import sys
import string
-import regex
+import re
import getopt
import time
for rev in allrevs:
formatrev(rev, prefix)
-parsedateprog = regex.compile(
- '^date: \([0-9]+\)/\([0-9]+\)/\([0-9]+\) ' +
- '\([0-9]+\):\([0-9]+\):\([0-9]+\); author: \([^ ;]+\)')
+parsedateprog = re.compile(
+ '^date: ([0-9]+)/([0-9]+)/([0-9]+) ' +
+ '([0-9]+):([0-9]+):([0-9]+); author: ([^ ;]+)')
authormap = {
'guido': 'Guido van Rossum <guido@cnri.reston.va.us>',
print
print
-startprog = regex.compile("^Working file: \(.*\)$")
+startprog = re.compile("^Working file: (.*)$")
def getnextfile(f):
while 1:
# Python script for bumping up an RCS major revision number.
import sys
-import regex
+import re
import rcslib
import string
WITHLOCK = 1
-majorrev_re = regex.compile('^[0-9]+')
+majorrev_re = re.compile('^[0-9]+')
dir = rcslib.RCS()
import fnmatch
import os
-import regsub
+import re
import string
import tempfile
cmd = 'ci %s%s -t%s %s %s' % \
(lockflag, rev, f.name, otherflags, name)
else:
- message = regsub.gsub('\([\\"$`]\)', '\\\\\\1', message)
+ message = re.sub(r'([\"$`])', r'\\\1', message)
cmd = 'ci %s%s -m"%s" %s %s' % \
(lockflag, rev, message, otherflags, name)
return self._system(cmd)
# into a program for a different change to Python programs...
import sys
-import regex
+import re
import os
from stat import *
import string
if fix(arg): bad = 1
sys.exit(bad)
-ispythonprog = regex.compile('^[a-zA-Z0-9_]+\.py$')
+ispythonprog = re.compile('^[a-zA-Z0-9_]+\.py$')
def ispython(name):
return ispythonprog.match(name) >= 0
if lineno == 1 and g is None and line[:2] == '#!':
# Check for non-Python scripts
words = string.split(line[2:])
- if words and regex.search('[pP]ython', words[0]) < 0:
+ if words and re.search('[pP]ython', words[0]) < 0:
msg = filename + ': ' + words[0]
msg = msg + ' script; not fixed\n'
err(msg)
import os
import sys
-import regex
+import re
import string
import getopt
-pat = '^\([a-zA-Z0-9 :]*\)!\(.*\)!\(.*\)!\([<>].*\)!\([0-9]+\)!\([0-9]+\)$'
-prog = regex.compile(pat)
+pat = '^([a-zA-Z0-9 :]*)!(.*)!(.*)!([<>].*)!([0-9]+)!([0-9]+)$'
+prog = re.compile(pat)
def main():
maxitems = 25
import os
import stat
import getopt
-import regex
+import re
def main():
dofile = mmdf
if sts:
sys.exit(sts)
-numeric = regex.compile('[1-9][0-9]*')
+numeric = re.compile('[1-9][0-9]*')
def mh(dir):
sts = 0
import os
import sys
-import regex
+import re
-pat = '^\([^: \t\n]+\):\([1-9][0-9]*\):'
-prog = regex.compile(pat)
+pat = '^([^: \t\n]+):([1-9][0-9]*):'
+prog = re.compile(pat)
class FileObj:
def __init__(self, filename):
import sys
import time
import struct
-import regsub
from socket import *
# Widget to display a man page
-import regex
+import re
from Tkinter import *
from Tkinter import _tkinter
from ScrolledText import ScrolledText
# XXX Recognizing footers is system dependent
# (This one works for IRIX 5.2 and Solaris 2.2)
-footerprog = regex.compile(
+footerprog = re.compile(
'^ Page [1-9][0-9]*[ \t]+\|^.*Last change:.*[1-9][0-9]*\n')
-emptyprog = regex.compile('^[ \t]*\n')
-ulprog = regex.compile('^[ \t]*[Xv!_][Xv!_ \t]*\n')
+emptyprog = re.compile('^[ \t]*\n')
+ulprog = re.compile('^[ \t]*[Xv!_][Xv!_ \t]*\n')
# Basic Man Page class -- does not disable editing
class EditableManPage(ScrolledText):
import os
import sys
-import regex
+import re
import getopt
import string
import mhlib
scanmenu.unpost()
scanmenu.invoke('active')
-scanparser = regex.compile('^ *\([0-9]+\)')
+scanparser = re.compile('^ *([0-9]+)')
def open_folder(e=None):
global folder, mhf
import sys
import os
import string
-import regex
+import re
from Tkinter import *
from ManPage import ManPage
print 'Empty search string'
return
if not self.casevar.get():
- map = regex.casefold
+ map = re.IGNORECASE
else:
map = None
try:
if map:
- prog = regex.compile(search, map)
+ prog = re.compile(search, map)
else:
- prog = regex.compile(search)
- except regex.error, msg:
+ prog = re.compile(search)
+ except re.error, msg:
self.frame.bell()
print 'Regex error:', msg
return
The \module{re} module was added in Python 1.5, and provides
Perl-style regular expression patterns. Earlier versions of Python
-came with the \module{regex} module, which provides Emacs-style
-patterns. Emacs-style patterns are slightly less readable and
-don't provide as many features, so there's not much reason to use
-the \module{regex} module when writing new code, though you might
-encounter old code that uses it.
+came with the \module{regex} module, which provided Emacs-style
+patterns. \module{regex} module was removed in Python 2.5.
Regular expressions (or REs) are essentially a tiny, highly
specialized programming language embedded inside Python and made
by O'Reilly. Unfortunately, it exclusively concentrates on Perl and
Java's flavours of regular expressions, and doesn't contain any Python
material at all, so it won't be useful as a reference for programming
-in Python. (The first edition covered Python's now-obsolete
+in Python. (The first edition covered Python's now-removed
\module{regex} module, which won't help you much.) Consider checking
it out from your library.
\input{libstrings} % String Services
\input{libstring}
\input{libre}
-\input{libreconvert}
\input{libstruct} % XXX also/better in File Formats?
\input{libdifflib}
\input{libstringio}
%\input{libcmpcache}
%\input{libcmp}
%\input{libni}
-%\input{libregex}
-%\input{libregsub}
\chapter{Reporting Bugs}
\input{reportingbugs}
>>> re.split('\W+', 'Words, words, words.', 1)
['Words', 'words, words.']
\end{verbatim}
-
- This function combines and extends the functionality of
- the old \function{regsub.split()} and \function{regsub.splitx()}.
\end{funcdesc}
\begin{funcdesc}{findall}{pattern, string\optional{, flags}}
>>> re.match('Begin (\w| )*? end', s).end()
Traceback (most recent call last):
File "<stdin>", line 1, in ?
- File "/usr/local/lib/python2.3/sre.py", line 132, in match
+ File "/usr/local/lib/python2.5/re.py", line 132, in match
return _compile(pattern, flags).match(string)
RuntimeError: maximum recursion limit exceeded
\end{verbatim}
+++ /dev/null
-\section{\module{reconvert} ---
- Convert regular expressions from regex to re form}
-\declaremodule{standard}{reconvert}
-\moduleauthor{Andrew M. Kuchling}{amk@amk.ca}
-\sectionauthor{Skip Montanaro}{skip@pobox.com}
-
-
-\modulesynopsis{Convert regex-, emacs- or sed-style regular expressions
-to re-style syntax.}
-
-
-This module provides a facility to convert regular expressions from the
-syntax used by the deprecated \module{regex} module to those used by the
-newer \module{re} module. Because of similarity between the regular
-expression syntax of \code{sed(1)} and \code{emacs(1)} and the
-\module{regex} module, it is also helpful to convert patterns written for
-those tools to \module{re} patterns.
-
-When used as a script, a Python string literal (or any other expression
-evaluating to a string) is read from stdin, and the translated expression is
-written to stdout as a string literal. Unless stdout is a tty, no trailing
-newline is written to stdout. This is done so that it can be used with
-Emacs \code{C-U M-|} (shell-command-on-region) which filters the region
-through the shell command.
-
-\begin{seealso}
- \seetitle{Mastering Regular Expressions}{Book on regular expressions
- by Jeffrey Friedl, published by O'Reilly. The second
- edition of the book no longer covers Python at all,
- but the first edition covered writing good regular expression
- patterns in great detail.}
-\end{seealso}
-
-\subsection{Module Contents}
-\nodename{Contents of Module reconvert}
-
-The module defines two functions and a handful of constants.
-
-\begin{funcdesc}{convert}{pattern\optional{, syntax=None}}
- Convert a \var{pattern} representing a \module{regex}-stype regular
- expression into a \module{re}-style regular expression. The optional
- \var{syntax} parameter is a bitwise-or'd set of flags that control what
- constructs are converted. See below for a description of the various
- constants.
-\end{funcdesc}
-
-\begin{funcdesc}{quote}{s\optional{, quote=None}}
- Convert a string object to a quoted string literal.
-
- This is similar to \function{repr} but will return a "raw" string (r'...'
- or r"...") when the string contains backslashes, instead of doubling all
- backslashes. The resulting string does not always evaluate to the same
- string as the original; however it will do just the right thing when passed
- into re.compile().
-
- The optional second argument forces the string quote; it must be a single
- character which is a valid Python string quote. Note that prior to Python
- 2.5 this would not accept triple-quoted string delimiters.
-\end{funcdesc}
-
-\begin{datadesc}{RE_NO_BK_PARENS}
- Suppress paren conversion. This should be omitted when converting
- \code{sed}-style or \code{emacs}-style regular expressions.
-\end{datadesc}
-
-\begin{datadesc}{RE_NO_BK_VBAR}
- Suppress vertical bar conversion. This should be omitted when converting
- \code{sed}-style or \code{emacs}-style regular expressions.
-\end{datadesc}
-
-\begin{datadesc}{RE_BK_PLUS_QM}
- Enable conversion of \code{+} and \code{?} characters. This should be
- added to the \var{syntax} arg of \function{convert} when converting
- \code{sed}-style regular expressions and omitted when converting
- \code{emacs}-style regular expressions.
-\end{datadesc}
-
-\begin{datadesc}{RE_NEWLINE_OR}
- When set, newline characters are replaced by \code{|}.
-\end{datadesc}
+++ /dev/null
-\section{\module{regex} ---
- Regular expression operations}
-\declaremodule{builtin}{regex}
-
-\modulesynopsis{Regular expression search and match operations.
- \strong{Obsolete!}}
-
-
-This module provides regular expression matching operations similar to
-those found in Emacs.
-
-\strong{Obsolescence note:}
-This module is obsolete as of Python version 1.5; it is still being
-maintained because much existing code still uses it. All new code in
-need of regular expressions should use the new
-\code{re}\refstmodindex{re} module, which supports the more powerful
-and regular Perl-style regular expressions. Existing code should be
-converted. The standard library module
-\code{reconvert}\refstmodindex{reconvert} helps in converting
-\code{regex} style regular expressions to \code{re}\refstmodindex{re}
-style regular expressions. (For more conversion help, see Andrew
-Kuchling's\index{Kuchling, Andrew} ``\module{regex-to-re} HOWTO'' at
-\url{http://www.python.org/doc/howto/regex-to-re/}.)
-
-By default the patterns are Emacs-style regular expressions
-(with one exception). There is
-a way to change the syntax to match that of several well-known
-\UNIX{} utilities. The exception is that Emacs' \samp{\e s}
-pattern is not supported, since the original implementation references
-the Emacs syntax tables.
-
-This module is 8-bit clean: both patterns and strings may contain null
-bytes and characters whose high bit is set.
-
-\strong{Please note:} There is a little-known fact about Python string
-literals which means that you don't usually have to worry about
-doubling backslashes, even though they are used to escape special
-characters in string literals as well as in regular expressions. This
-is because Python doesn't remove backslashes from string literals if
-they are followed by an unrecognized escape character.
-\emph{However}, if you want to include a literal \dfn{backslash} in a
-regular expression represented as a string literal, you have to
-\emph{quadruple} it or enclose it in a singleton character class.
-E.g.\ to extract \LaTeX\ \samp{\e section\{\textrm{\ldots}\}} headers
-from a document, you can use this pattern:
-\code{'[\e ]section\{\e (.*\e )\}'}. \emph{Another exception:}
-the escape sequence \samp{\e b} is significant in string literals
-(where it means the ASCII bell character) as well as in Emacs regular
-expressions (where it stands for a word boundary), so in order to
-search for a word boundary, you should use the pattern \code{'\e \e b'}.
-Similarly, a backslash followed by a digit 0-7 should be doubled to
-avoid interpretation as an octal escape.
-
-\subsection{Regular Expressions}
-
-A regular expression (or RE) specifies a set of strings that matches
-it; the functions in this module let you check if a particular string
-matches a given regular expression (or if a given regular expression
-matches a particular string, which comes down to the same thing).
-
-Regular expressions can be concatenated to form new regular
-expressions; if \emph{A} and \emph{B} are both regular expressions,
-then \emph{AB} is also an regular expression. If a string \emph{p}
-matches A and another string \emph{q} matches B, the string \emph{pq}
-will match AB. Thus, complex expressions can easily be constructed
-from simpler ones like the primitives described here. For details of
-the theory and implementation of regular expressions, consult almost
-any textbook about compiler construction.
-
-% XXX The reference could be made more specific, say to
-% "Compilers: Principles, Techniques and Tools", by Alfred V. Aho,
-% Ravi Sethi, and Jeffrey D. Ullman, or some FA text.
-
-A brief explanation of the format of regular expressions follows.
-
-Regular expressions can contain both special and ordinary characters.
-Ordinary characters, like '\code{A}', '\code{a}', or '\code{0}', are
-the simplest regular expressions; they simply match themselves. You
-can concatenate ordinary characters, so '\code{last}' matches the
-characters 'last'. (In the rest of this section, we'll write RE's in
-\code{this special font}, usually without quotes, and strings to be
-matched 'in single quotes'.)
-
-Special characters either stand for classes of ordinary characters, or
-affect how the regular expressions around them are interpreted.
-
-The special characters are:
-\begin{itemize}
-\item[\code{.}] (Dot.) Matches any character except a newline.
-\item[\code{\^}] (Caret.) Matches the start of the string.
-\item[\code{\$}] Matches the end of the string.
-\code{foo} matches both 'foo' and 'foobar', while the regular
-expression '\code{foo\$}' matches only 'foo'.
-\item[\code{*}] Causes the resulting RE to
-match 0 or more repetitions of the preceding RE. \code{ab*} will
-match 'a', 'ab', or 'a' followed by any number of 'b's.
-\item[\code{+}] Causes the
-resulting RE to match 1 or more repetitions of the preceding RE.
-\code{ab+} will match 'a' followed by any non-zero number of 'b's; it
-will not match just 'a'.
-\item[\code{?}] Causes the resulting RE to
-match 0 or 1 repetitions of the preceding RE. \code{ab?} will
-match either 'a' or 'ab'.
-
-\item[\code{\e}] Either escapes special characters (permitting you to match
-characters like '*?+\&\$'), or signals a special sequence; special
-sequences are discussed below. Remember that Python also uses the
-backslash as an escape sequence in string literals; if the escape
-sequence isn't recognized by Python's parser, the backslash and
-subsequent character are included in the resulting string. However,
-if Python would recognize the resulting sequence, the backslash should
-be repeated twice.
-
-\item[\code{[]}] Used to indicate a set of characters. Characters can
-be listed individually, or a range is indicated by giving two
-characters and separating them by a '-'. Special characters are
-not active inside sets. For example, \code{[akm\$]}
-will match any of the characters 'a', 'k', 'm', or '\$'; \code{[a-z]} will
-match any lowercase letter.
-
-If you want to include a \code{]} inside a
-set, it must be the first character of the set; to include a \code{-},
-place it as the first or last character.
-
-Characters \emph{not} within a range can be matched by including a
-\code{\^} as the first character of the set; \code{\^} elsewhere will
-simply match the '\code{\^}' character.
-\end{itemize}
-
-The special sequences consist of '\code{\e}' and a character
-from the list below. If the ordinary character is not on the list,
-then the resulting RE will match the second character. For example,
-\code{\e\$} matches the character '\$'. Ones where the backslash
-should be doubled in string literals are indicated.
-
-\begin{itemize}
-\item[\code{\e|}]\code{A\e|B}, where A and B can be arbitrary REs,
-creates a regular expression that will match either A or B. This can
-be used inside groups (see below) as well.
-%
-\item[\code{\e( \e)}] Indicates the start and end of a group; the
-contents of a group can be matched later in the string with the
-\code{\e [1-9]} special sequence, described next.
-\end{itemize}
-
-\begin{fulllineitems}
-\item[\code{\e \e 1, ... \e \e 7, \e 8, \e 9}]
-Matches the contents of the group of the same
-number. For example, \code{\e (.+\e ) \e \e 1} matches 'the the' or
-'55 55', but not 'the end' (note the space after the group). This
-special sequence can only be used to match one of the first 9 groups;
-groups with higher numbers can be matched using the \code{\e v}
-sequence. (\code{\e 8} and \code{\e 9} don't need a double backslash
-because they are not octal digits.)
-\end{fulllineitems}
-
-\begin{itemize}
-\item[\code{\e \e b}] Matches the empty string, but only at the
-beginning or end of a word. A word is defined as a sequence of
-alphanumeric characters, so the end of a word is indicated by
-whitespace or a non-alphanumeric character.
-%
-\item[\code{\e B}] Matches the empty string, but when it is \emph{not} at the
-beginning or end of a word.
-%
-\item[\code{\e v}] Must be followed by a two digit decimal number, and
-matches the contents of the group of the same number. The group
-number must be between 1 and 99, inclusive.
-%
-\item[\code{\e w}]Matches any alphanumeric character; this is
-equivalent to the set \code{[a-zA-Z0-9]}.
-%
-\item[\code{\e W}] Matches any non-alphanumeric character; this is
-equivalent to the set \code{[\^a-zA-Z0-9]}.
-\item[\code{\e <}] Matches the empty string, but only at the beginning of a
-word. A word is defined as a sequence of alphanumeric characters, so
-the end of a word is indicated by whitespace or a non-alphanumeric
-character.
-\item[\code{\e >}] Matches the empty string, but only at the end of a
-word.
-
-\item[\code{\e \e \e \e}] Matches a literal backslash.
-
-% In Emacs, the following two are start of buffer/end of buffer. In
-% Python they seem to be synonyms for ^$.
-\item[\code{\e `}] Like \code{\^}, this only matches at the start of the
-string.
-\item[\code{\e \e '}] Like \code{\$}, this only matches at the end of
-the string.
-% end of buffer
-\end{itemize}
-
-\subsection{Module Contents}
-\nodename{Contents of Module regex}
-
-The module defines these functions, and an exception:
-
-
-\begin{funcdesc}{match}{pattern, string}
- Return how many characters at the beginning of \var{string} match
- the regular expression \var{pattern}. Return \code{-1} if the
- string does not match the pattern (this is different from a
- zero-length match!).
-\end{funcdesc}
-
-\begin{funcdesc}{search}{pattern, string}
- Return the first position in \var{string} that matches the regular
- expression \var{pattern}. Return \code{-1} if no position in the string
- matches the pattern (this is different from a zero-length match
- anywhere!).
-\end{funcdesc}
-
-\begin{funcdesc}{compile}{pattern\optional{, translate}}
- Compile a regular expression pattern into a regular expression
- object, which can be used for matching using its \code{match()} and
- \code{search()} methods, described below. The optional argument
- \var{translate}, if present, must be a 256-character string
- indicating how characters (both of the pattern and of the strings to
- be matched) are translated before comparing them; the \var{i}-th
- element of the string gives the translation for the character with
- \ASCII{} code \var{i}. This can be used to implement
- case-insensitive matching; see the \code{casefold} data item below.
-
- The sequence
-
-\begin{verbatim}
-prog = regex.compile(pat)
-result = prog.match(str)
-\end{verbatim}
-%
-is equivalent to
-
-\begin{verbatim}
-result = regex.match(pat, str)
-\end{verbatim}
-
-but the version using \code{compile()} is more efficient when multiple
-regular expressions are used concurrently in a single program. (The
-compiled version of the last pattern passed to \code{regex.match()} or
-\code{regex.search()} is cached, so programs that use only a single
-regular expression at a time needn't worry about compiling regular
-expressions.)
-\end{funcdesc}
-
-\begin{funcdesc}{set_syntax}{flags}
- Set the syntax to be used by future calls to \code{compile()},
- \code{match()} and \code{search()}. (Already compiled expression
- objects are not affected.) The argument is an integer which is the
- OR of several flag bits. The return value is the previous value of
- the syntax flags. Names for the flags are defined in the standard
- module \code{regex_syntax}\refstmodindex{regex_syntax}; read the
- file \file{regex_syntax.py} for more information.
-\end{funcdesc}
-
-\begin{funcdesc}{get_syntax}{}
- Returns the current value of the syntax flags as an integer.
-\end{funcdesc}
-
-\begin{funcdesc}{symcomp}{pattern\optional{, translate}}
-This is like \code{compile()}, but supports symbolic group names: if a
-parenthesis-enclosed group begins with a group name in angular
-brackets, e.g. \code{'\e(<id>[a-z][a-z0-9]*\e)'}, the group can
-be referenced by its name in arguments to the \code{group()} method of
-the resulting compiled regular expression object, like this:
-\code{p.group('id')}. Group names may contain alphanumeric characters
-and \code{'_'} only.
-\end{funcdesc}
-
-\begin{excdesc}{error}
- Exception raised when a string passed to one of the functions here
- is not a valid regular expression (e.g., unmatched parentheses) or
- when some other error occurs during compilation or matching. (It is
- never an error if a string contains no match for a pattern.)
-\end{excdesc}
-
-\begin{datadesc}{casefold}
-A string suitable to pass as the \var{translate} argument to
-\code{compile()} to map all upper case characters to their lowercase
-equivalents.
-\end{datadesc}
-
-\noindent
-Compiled regular expression objects support these methods:
-
-\setindexsubitem{(regex method)}
-\begin{funcdesc}{match}{string\optional{, pos}}
- Return how many characters at the beginning of \var{string} match
- the compiled regular expression. Return \code{-1} if the string
- does not match the pattern (this is different from a zero-length
- match!).
-
- The optional second parameter, \var{pos}, gives an index in the string
- where the search is to start; it defaults to \code{0}. This is not
- completely equivalent to slicing the string; the \code{'\^'} pattern
- character matches at the real beginning of the string and at positions
- just after a newline, not necessarily at the index where the search
- is to start.
-\end{funcdesc}
-
-\begin{funcdesc}{search}{string\optional{, pos}}
- Return the first position in \var{string} that matches the regular
- expression \code{pattern}. Return \code{-1} if no position in the
- string matches the pattern (this is different from a zero-length
- match anywhere!).
-
- The optional second parameter has the same meaning as for the
- \code{match()} method.
-\end{funcdesc}
-
-\begin{funcdesc}{group}{index, index, ...}
-This method is only valid when the last call to the \code{match()}
-or \code{search()} method found a match. It returns one or more
-groups of the match. If there is a single \var{index} argument,
-the result is a single string; if there are multiple arguments, the
-result is a tuple with one item per argument. If the \var{index} is
-zero, the corresponding return value is the entire matching string; if
-it is in the inclusive range [1..99], it is the string matching the
-corresponding parenthesized group (using the default syntax,
-groups are parenthesized using \code{{\e}(} and \code{{\e})}). If no
-such group exists, the corresponding result is \code{None}.
-
-If the regular expression was compiled by \code{symcomp()} instead of
-\code{compile()}, the \var{index} arguments may also be strings
-identifying groups by their group name.
-\end{funcdesc}
-
-\noindent
-Compiled regular expressions support these data attributes:
-
-\setindexsubitem{(regex attribute)}
-
-\begin{datadesc}{regs}
-When the last call to the \code{match()} or \code{search()} method found a
-match, this is a tuple of pairs of indexes corresponding to the
-beginning and end of all parenthesized groups in the pattern. Indices
-are relative to the string argument passed to \code{match()} or
-\code{search()}. The 0-th tuple gives the beginning and end or the
-whole pattern. When the last match or search failed, this is
-\code{None}.
-\end{datadesc}
-
-\begin{datadesc}{last}
-When the last call to the \code{match()} or \code{search()} method found a
-match, this is the string argument passed to that method. When the
-last match or search failed, this is \code{None}.
-\end{datadesc}
-
-\begin{datadesc}{translate}
-This is the value of the \var{translate} argument to
-\code{regex.compile()} that created this regular expression object. If
-the \var{translate} argument was omitted in the \code{regex.compile()}
-call, this is \code{None}.
-\end{datadesc}
-
-\begin{datadesc}{givenpat}
-The regular expression pattern as passed to \code{compile()} or
-\code{symcomp()}.
-\end{datadesc}
-
-\begin{datadesc}{realpat}
-The regular expression after stripping the group names for regular
-expressions compiled with \code{symcomp()}. Same as \code{givenpat}
-otherwise.
-\end{datadesc}
-
-\begin{datadesc}{groupindex}
-A dictionary giving the mapping from symbolic group names to numerical
-group indexes for regular expressions compiled with \code{symcomp()}.
-\code{None} otherwise.
-\end{datadesc}
+++ /dev/null
-\section{\module{regsub} ---
- String operations using regular expressions}
-
-\declaremodule{standard}{regsub}
-\modulesynopsis{Substitution and splitting operations that use
- regular expressions. \strong{Obsolete!}}
-
-
-This module defines a number of functions useful for working with
-regular expressions (see built-in module \refmodule{regex}).
-
-Warning: these functions are not thread-safe.
-
-\strong{Obsolescence note:}
-This module is obsolete as of Python version 1.5; it is still being
-maintained because much existing code still uses it. All new code in
-need of regular expressions should use the new \refmodule{re} module, which
-supports the more powerful and regular Perl-style regular expressions.
-Existing code should be converted. The standard library module
-\module{reconvert} helps in converting \refmodule{regex} style regular
-expressions to \refmodule{re} style regular expressions. (For more
-conversion help, see Andrew Kuchling's\index{Kuchling, Andrew}
-``regex-to-re HOWTO'' at
-\url{http://www.python.org/doc/howto/regex-to-re/}.)
-
-
-\begin{funcdesc}{sub}{pat, repl, str}
-Replace the first occurrence of pattern \var{pat} in string
-\var{str} by replacement \var{repl}. If the pattern isn't found,
-the string is returned unchanged. The pattern may be a string or an
-already compiled pattern. The replacement may contain references
-\samp{\e \var{digit}} to subpatterns and escaped backslashes.
-\end{funcdesc}
-
-\begin{funcdesc}{gsub}{pat, repl, str}
-Replace all (non-overlapping) occurrences of pattern \var{pat} in
-string \var{str} by replacement \var{repl}. The same rules as for
-\code{sub()} apply. Empty matches for the pattern are replaced only
-when not adjacent to a previous match, so e.g.
-\code{gsub('', '-', 'abc')} returns \code{'-a-b-c-'}.
-\end{funcdesc}
-
-\begin{funcdesc}{split}{str, pat\optional{, maxsplit}}
-Split the string \var{str} in fields separated by delimiters matching
-the pattern \var{pat}, and return a list containing the fields. Only
-non-empty matches for the pattern are considered, so e.g.
-\code{split('a:b', ':*')} returns \code{['a', 'b']} and
-\code{split('abc', '')} returns \code{['abc']}. The \var{maxsplit}
-defaults to 0. If it is nonzero, only \var{maxsplit} number of splits
-occur, and the remainder of the string is returned as the final
-element of the list.
-\end{funcdesc}
-
-\begin{funcdesc}{splitx}{str, pat\optional{, maxsplit}}
-Split the string \var{str} in fields separated by delimiters matching
-the pattern \var{pat}, and return a list containing the fields as well
-as the separators. For example, \code{splitx('a:::b', ':*')} returns
-\code{['a', ':::', 'b']}. Otherwise, this function behaves the same
-as \code{split}.
-\end{funcdesc}
-
-\begin{funcdesc}{capwords}{s\optional{, pat}}
-Capitalize words separated by optional pattern \var{pat}. The default
-pattern uses any characters except letters, digits and underscores as
-word delimiters. Capitalization is done by changing the first
-character of each word to upper case.
-\end{funcdesc}
-
-\begin{funcdesc}{clear_cache}{}
-The regsub module maintains a cache of compiled regular expressions,
-keyed on the regular expression string and the syntax of the regex
-module at the time the expression was compiled. This function clears
-that cache.
-\end{funcdesc}
\item[\module{rand}]
--- Old interface to the random number generator.
-\item[\module{regex}]
---- Emacs-style regular expression support; may still be used in some
-old code (extension module). Refer to the
-\citetitle[http://www.python.org/doc/1.6/lib/module-regex.html]{Python
-1.6 Documentation} for documentation.
-
-\item[\module{regsub}]
---- Regular expression based string replacement utilities, for use
-with \module{regex} (extension module). Refer to the
-\citetitle[http://www.python.org/doc/1.6/lib/module-regsub.html]{Python
-1.6 Documentation} for documentation.
-
\item[\module{statcache}]
--- Caches the results of os.stat(). Using the cache can be fragile
and error-prone, just use \code{os.stat()} directly.
+++ /dev/null
-# Text formatting abstractions
-# Note -- this module is obsolete, it's too slow anyway
-
-
-# Oft-used type object
-Int = type(0)
-
-
-# Represent a paragraph. This is a list of words with associated
-# font and size information, plus indents and justification for the
-# entire paragraph.
-# Once the words have been added to a paragraph, it can be laid out
-# for different line widths. Once laid out, it can be rendered at
-# different screen locations. Once rendered, it can be queried
-# for mouse hits, and parts of the text can be highlighted
-class Para:
- #
- def __init__(self):
- self.words = [] # The words
- self.just = 'l' # Justification: 'l', 'r', 'lr' or 'c'
- self.indent_left = self.indent_right = self.indent_hang = 0
- # Final lay-out parameters, may change
- self.left = self.top = self.right = self.bottom = \
- self.width = self.height = self.lines = None
- #
- # Add a word, computing size information for it.
- # Words may also be added manually by appending to self.words
- # Each word should be a 7-tuple:
- # (font, text, width, space, stretch, ascent, descent)
- def addword(self, d, font, text, space, stretch):
- if font is not None:
- d.setfont(font)
- width = d.textwidth(text)
- ascent = d.baseline()
- descent = d.lineheight() - ascent
- spw = d.textwidth(' ')
- space = space * spw
- stretch = stretch * spw
- tuple = (font, text, width, space, stretch, ascent, descent)
- self.words.append(tuple)
- #
- # Hooks to begin and end anchors -- insert numbers in the word list!
- def bgn_anchor(self, id):
- self.words.append(id)
- #
- def end_anchor(self, id):
- self.words.append(0)
- #
- # Return the total length (width) of the text added so far, in pixels
- def getlength(self):
- total = 0
- for word in self.words:
- if type(word) is not Int:
- total = total + word[2] + word[3]
- return total
- #
- # Tab to a given position (relative to the current left indent):
- # remove all stretch, add fixed space up to the new indent.
- # If the current position is already at the tab stop,
- # don't add any new space (but still remove the stretch)
- def tabto(self, tab):
- total = 0
- as, de = 1, 0
- for i in range(len(self.words)):
- word = self.words[i]
- if type(word) is Int: continue
- (fo, te, wi, sp, st, as, de) = word
- self.words[i] = (fo, te, wi, sp, 0, as, de)
- total = total + wi + sp
- if total < tab:
- self.words.append((None, '', 0, tab-total, 0, as, de))
- #
- # Make a hanging tag: tab to hang, increment indent_left by hang,
- # and reset indent_hang to -hang
- def makehangingtag(self, hang):
- self.tabto(hang)
- self.indent_left = self.indent_left + hang
- self.indent_hang = -hang
- #
- # Decide where the line breaks will be given some screen width
- def layout(self, linewidth):
- self.width = linewidth
- height = 0
- self.lines = lines = []
- avail1 = self.width - self.indent_left - self.indent_right
- avail = avail1 - self.indent_hang
- words = self.words
- i = 0
- n = len(words)
- lastfont = None
- while i < n:
- firstfont = lastfont
- charcount = 0
- width = 0
- stretch = 0
- ascent = 0
- descent = 0
- lsp = 0
- j = i
- while i < n:
- word = words[i]
- if type(word) is Int:
- if word > 0 and width >= avail:
- break
- i = i+1
- continue
- fo, te, wi, sp, st, as, de = word
- if width + wi > avail and width > 0 and wi > 0:
- break
- if fo is not None:
- lastfont = fo
- if width == 0:
- firstfont = fo
- charcount = charcount + len(te) + (sp > 0)
- width = width + wi + sp
- lsp = sp
- stretch = stretch + st
- lst = st
- ascent = max(ascent, as)
- descent = max(descent, de)
- i = i+1
- while i > j and type(words[i-1]) is Int and \
- words[i-1] > 0: i = i-1
- width = width - lsp
- if i < n:
- stretch = stretch - lst
- else:
- stretch = 0
- tuple = i-j, firstfont, charcount, width, stretch, \
- ascent, descent
- lines.append(tuple)
- height = height + ascent + descent
- avail = avail1
- self.height = height
- #
- # Call a function for all words in a line
- def visit(self, wordfunc, anchorfunc):
- avail1 = self.width - self.indent_left - self.indent_right
- avail = avail1 - self.indent_hang
- v = self.top
- i = 0
- for tuple in self.lines:
- wordcount, firstfont, charcount, width, stretch, \
- ascent, descent = tuple
- h = self.left + self.indent_left
- if i == 0: h = h + self.indent_hang
- extra = 0
- if self.just == 'r': h = h + avail - width
- elif self.just == 'c': h = h + (avail - width) / 2
- elif self.just == 'lr' and stretch > 0:
- extra = avail - width
- v2 = v + ascent + descent
- for j in range(i, i+wordcount):
- word = self.words[j]
- if type(word) is Int:
- ok = anchorfunc(self, tuple, word, \
- h, v)
- if ok is not None: return ok
- continue
- fo, te, wi, sp, st, as, de = word
- if extra > 0 and stretch > 0:
- ex = extra * st / stretch
- extra = extra - ex
- stretch = stretch - st
- else:
- ex = 0
- h2 = h + wi + sp + ex
- ok = wordfunc(self, tuple, word, h, v, \
- h2, v2, (j==i), (j==i+wordcount-1))
- if ok is not None: return ok
- h = h2
- v = v2
- i = i + wordcount
- avail = avail1
- #
- # Render a paragraph in "drawing object" d, using the rectangle
- # given by (left, top, right) with an unspecified bottom.
- # Return the computed bottom of the text.
- def render(self, d, left, top, right):
- if self.width != right-left:
- self.layout(right-left)
- self.left = left
- self.top = top
- self.right = right
- self.bottom = self.top + self.height
- self.anchorid = 0
- try:
- self.d = d
- self.visit(self.__class__._renderword, \
- self.__class__._renderanchor)
- finally:
- self.d = None
- return self.bottom
- #
- def _renderword(self, tuple, word, h, v, h2, v2, isfirst, islast):
- if word[0] is not None: self.d.setfont(word[0])
- baseline = v + tuple[5]
- self.d.text((h, baseline - word[5]), word[1])
- if self.anchorid > 0:
- self.d.line((h, baseline+2), (h2, baseline+2))
- #
- def _renderanchor(self, tuple, word, h, v):
- self.anchorid = word
- #
- # Return which anchor(s) was hit by the mouse
- def hitcheck(self, mouseh, mousev):
- self.mouseh = mouseh
- self.mousev = mousev
- self.anchorid = 0
- self.hits = []
- self.visit(self.__class__._hitcheckword, \
- self.__class__._hitcheckanchor)
- return self.hits
- #
- def _hitcheckword(self, tuple, word, h, v, h2, v2, isfirst, islast):
- if self.anchorid > 0 and h <= self.mouseh <= h2 and \
- v <= self.mousev <= v2:
- self.hits.append(self.anchorid)
- #
- def _hitcheckanchor(self, tuple, word, h, v):
- self.anchorid = word
- #
- # Return whether the given anchor id is present
- def hasanchor(self, id):
- return id in self.words or -id in self.words
- #
- # Extract the raw text from the word list, substituting one space
- # for non-empty inter-word space, and terminating with '\n'
- def extract(self):
- text = ''
- for w in self.words:
- if type(w) is not Int:
- word = w[1]
- if w[3]: word = word + ' '
- text = text + word
- return text + '\n'
- #
- # Return which character position was hit by the mouse, as
- # an offset in the entire text as returned by extract().
- # Return None if the mouse was not in this paragraph
- def whereis(self, d, mouseh, mousev):
- if mousev < self.top or mousev > self.bottom:
- return None
- self.mouseh = mouseh
- self.mousev = mousev
- self.lastfont = None
- self.charcount = 0
- try:
- self.d = d
- return self.visit(self.__class__._whereisword, \
- self.__class__._whereisanchor)
- finally:
- self.d = None
- #
- def _whereisword(self, tuple, word, h1, v1, h2, v2, isfirst, islast):
- fo, te, wi, sp, st, as, de = word
- if fo is not None: self.lastfont = fo
- h = h1
- if isfirst: h1 = 0
- if islast: h2 = 999999
- if not (v1 <= self.mousev <= v2 and h1 <= self.mouseh <= h2):
- self.charcount = self.charcount + len(te) + (sp > 0)
- return
- if self.lastfont is not None:
- self.d.setfont(self.lastfont)
- cc = 0
- for c in te:
- cw = self.d.textwidth(c)
- if self.mouseh <= h + cw/2:
- return self.charcount + cc
- cc = cc+1
- h = h+cw
- self.charcount = self.charcount + cc
- if self.mouseh <= (h+h2) / 2:
- return self.charcount
- else:
- return self.charcount + 1
- #
- def _whereisanchor(self, tuple, word, h, v):
- pass
- #
- # Return screen position corresponding to position in paragraph.
- # Return tuple (h, vtop, vbaseline, vbottom).
- # This is more or less the inverse of whereis()
- def screenpos(self, d, pos):
- if pos < 0:
- ascent, descent = self.lines[0][5:7]
- return self.left, self.top, self.top + ascent, \
- self.top + ascent + descent
- self.pos = pos
- self.lastfont = None
- try:
- self.d = d
- ok = self.visit(self.__class__._screenposword, \
- self.__class__._screenposanchor)
- finally:
- self.d = None
- if ok is None:
- ascent, descent = self.lines[-1][5:7]
- ok = self.right, self.bottom - ascent - descent, \
- self.bottom - descent, self.bottom
- return ok
- #
- def _screenposword(self, tuple, word, h1, v1, h2, v2, isfirst, islast):
- fo, te, wi, sp, st, as, de = word
- if fo is not None: self.lastfont = fo
- cc = len(te) + (sp > 0)
- if self.pos > cc:
- self.pos = self.pos - cc
- return
- if self.pos < cc:
- self.d.setfont(self.lastfont)
- h = h1 + self.d.textwidth(te[:self.pos])
- else:
- h = h2
- ascent, descent = tuple[5:7]
- return h, v1, v1+ascent, v2
- #
- def _screenposanchor(self, tuple, word, h, v):
- pass
- #
- # Invert the stretch of text between pos1 and pos2.
- # If pos1 is None, the beginning is implied;
- # if pos2 is None, the end is implied.
- # Undoes its own effect when called again with the same arguments
- def invert(self, d, pos1, pos2):
- if pos1 is None:
- pos1 = self.left, self.top, self.top, self.top
- else:
- pos1 = self.screenpos(d, pos1)
- if pos2 is None:
- pos2 = self.right, self.bottom,self.bottom,self.bottom
- else:
- pos2 = self.screenpos(d, pos2)
- h1, top1, baseline1, bottom1 = pos1
- h2, top2, baseline2, bottom2 = pos2
- if bottom1 <= top2:
- d.invert((h1, top1), (self.right, bottom1))
- h1 = self.left
- if bottom1 < top2:
- d.invert((h1, bottom1), (self.right, top2))
- top1, bottom1 = top2, bottom2
- d.invert((h1, top1), (h2, bottom2))
+++ /dev/null
-# This module provides standard support for "packages".
-#
-# The idea is that large groups of related modules can be placed in
-# their own subdirectory, which can be added to the Python search path
-# in a relatively easy way.
-#
-# The current version takes a package name and searches the Python
-# search path for a directory by that name, and if found adds it to
-# the module search path (sys.path). It maintains a list of packages
-# that have already been added so adding the same package many times
-# is OK.
-#
-# It is intended to be used in a fairly stylized manner: each module
-# that wants to use a particular package, say 'Foo', is supposed to
-# contain the following code:
-#
-# from addpack import addpack
-# addpack('Foo')
-# <import modules from package Foo>
-#
-# Additional arguments, when present, provide additional places where
-# to look for the package before trying sys.path (these may be either
-# strings or lists/tuples of strings). Also, if the package name is a
-# full pathname, first the last component is tried in the usual way,
-# then the full pathname is tried last. If the package name is a
-# *relative* pathname (UNIX: contains a slash but doesn't start with
-# one), then nothing special is done. The packages "/foo/bar/bletch"
-# and "bletch" are considered the same, but unrelated to "bar/bletch".
-#
-# If the algorithm finds more than one suitable subdirectory, all are
-# added to the search path -- this makes it possible to override part
-# of a package. The same path will not be added more than once.
-#
-# If no directory is found, ImportError is raised.
-
-_packs = {} # {pack: [pathname, ...], ...}
-
-def addpack(pack, *locations):
- import os
- if os.path.isabs(pack):
- base = os.path.basename(pack)
- else:
- base = pack
- if _packs.has_key(base):
- return
- import sys
- path = []
- for loc in _flatten(locations) + sys.path:
- fn = os.path.join(loc, base)
- if fn not in path and os.path.isdir(fn):
- path.append(fn)
- if pack != base and pack not in path and os.path.isdir(pack):
- path.append(pack)
- if not path: raise ImportError, 'package ' + pack + ' not found'
- _packs[base] = path
- for fn in path:
- if fn not in sys.path:
- sys.path.append(fn)
-
-def _flatten(locations):
- locs = []
- for loc in locations:
- if type(loc) == type(''):
- locs.append(loc)
- else:
- locs = locs + _flatten(loc)
- return locs
+++ /dev/null
-"""Efficiently compare files, boolean outcome only (equal / not equal).
-
-Tricks (used in this order):
- - Files with identical type, size & mtime are assumed to be clones
- - Files with different type or size cannot be identical
- - We keep a cache of outcomes of earlier comparisons
- - We don't fork a process to run 'cmp' but read the files ourselves
-"""
-
-import os
-
-cache = {}
-
-def cmp(f1, f2, shallow=1):
- """Compare two files, use the cache if possible.
- Return 1 for identical files, 0 for different.
- Raise exceptions if either file could not be statted, read, etc."""
- s1, s2 = sig(os.stat(f1)), sig(os.stat(f2))
- if s1[0] != 8 or s2[0] != 8:
- # Either is a not a plain file -- always report as different
- return 0
- if shallow and s1 == s2:
- # type, size & mtime match -- report same
- return 1
- if s1[:2] != s2[:2]: # Types or sizes differ, don't bother
- # types or sizes differ -- report different
- return 0
- # same type and size -- look in the cache
- key = (f1, f2)
- try:
- cs1, cs2, outcome = cache[key]
- # cache hit
- if s1 == cs1 and s2 == cs2:
- # cached signatures match
- return outcome
- # stale cached signature(s)
- except KeyError:
- # cache miss
- pass
- # really compare
- outcome = do_cmp(f1, f2)
- cache[key] = s1, s2, outcome
- return outcome
-
-def sig(st):
- """Return signature (i.e., type, size, mtime) from raw stat data
- 0-5: st_mode, st_ino, st_dev, st_nlink, st_uid, st_gid
- 6-9: st_size, st_atime, st_mtime, st_ctime"""
- type = st[0] / 4096
- size = st[6]
- mtime = st[8]
- return type, size, mtime
-
-def do_cmp(f1, f2):
- """Compare two files, really."""
- bufsize = 8*1024 # Could be tuned
- fp1 = open(f1, 'rb')
- fp2 = open(f2, 'rb')
- while 1:
- b1 = fp1.read(bufsize)
- b2 = fp2.read(bufsize)
- if b1 != b2: return 0
- if not b1: return 1
+++ /dev/null
-"""Efficiently compare files, boolean outcome only (equal / not equal).
-
-Tricks (used in this order):
- - Use the statcache module to avoid statting files more than once
- - Files with identical type, size & mtime are assumed to be clones
- - Files with different type or size cannot be identical
- - We keep a cache of outcomes of earlier comparisons
- - We don't fork a process to run 'cmp' but read the files ourselves
-"""
-
-import os
-from stat import *
-import statcache
-
-
-# The cache.
-#
-cache = {}
-
-
-def cmp(f1, f2, shallow=1):
- """Compare two files, use the cache if possible.
- May raise os.error if a stat or open of either fails.
- Return 1 for identical files, 0 for different.
- Raise exceptions if either file could not be statted, read, etc."""
- s1, s2 = sig(statcache.stat(f1)), sig(statcache.stat(f2))
- if not S_ISREG(s1[0]) or not S_ISREG(s2[0]):
- # Either is a not a plain file -- always report as different
- return 0
- if shallow and s1 == s2:
- # type, size & mtime match -- report same
- return 1
- if s1[:2] != s2[:2]: # Types or sizes differ, don't bother
- # types or sizes differ -- report different
- return 0
- # same type and size -- look in the cache
- key = f1 + ' ' + f2
- if cache.has_key(key):
- cs1, cs2, outcome = cache[key]
- # cache hit
- if s1 == cs1 and s2 == cs2:
- # cached signatures match
- return outcome
- # stale cached signature(s)
- # really compare
- outcome = do_cmp(f1, f2)
- cache[key] = s1, s2, outcome
- return outcome
-
-def sig(st):
- """Return signature (i.e., type, size, mtime) from raw stat data."""
- return S_IFMT(st[ST_MODE]), st[ST_SIZE], st[ST_MTIME]
-
-def do_cmp(f1, f2):
- """Compare two files, really."""
- #print ' cmp', f1, f2 # XXX remove when debugged
- bufsize = 8*1024 # Could be tuned
- fp1 = open(f1, 'rb')
- fp2 = open(f2, 'rb')
- while 1:
- b1 = fp1.read(bufsize)
- b2 = fp2.read(bufsize)
- if b1 != b2: return 0
- if not b1: return 1
+++ /dev/null
-# A subroutine for extracting a function name from a code object
-# (with cache)
-
-import sys
-from stat import *
-import string
-import os
-import linecache
-
-# XXX The functions getcodename() and getfuncname() are now obsolete
-# XXX as code and function objects now have a name attribute --
-# XXX co.co_name and f.func_name.
-# XXX getlineno() is now also obsolete because of the new attribute
-# XXX of code objects, co.co_firstlineno.
-
-# Extract the function or class name from a code object.
-# This is a bit of a hack, since a code object doesn't contain
-# the name directly. So what do we do:
-# - get the filename (which *is* in the code object)
-# - look in the code string to find the first SET_LINENO instruction
-# (this must be the first instruction)
-# - get the line from the file
-# - if the line starts with 'class' or 'def' (after possible whitespace),
-# extract the following identifier
-#
-# This breaks apart when the function was read from <stdin>
-# or constructed by exec(), when the file is not accessible,
-# and also when the file has been modified or when a line is
-# continued with a backslash before the function or class name.
-#
-# Because this is a pretty expensive hack, a cache is kept.
-
-SET_LINENO = 127 # The opcode (see "opcode.h" in the Python source)
-identchars = string.ascii_letters + string.digits + '_' # Identifier characters
-
-_namecache = {} # The cache
-
-def getcodename(co):
- try:
- return co.co_name
- except AttributeError:
- pass
- key = `co` # arbitrary but uniquely identifying string
- if _namecache.has_key(key): return _namecache[key]
- filename = co.co_filename
- code = co.co_code
- name = ''
- if ord(code[0]) == SET_LINENO:
- lineno = ord(code[1]) | ord(code[2]) << 8
- line = linecache.getline(filename, lineno)
- words = line.split()
- if len(words) >= 2 and words[0] in ('def', 'class'):
- name = words[1]
- for i in range(len(name)):
- if name[i] not in identchars:
- name = name[:i]
- break
- _namecache[key] = name
- return name
-
-# Use the above routine to find a function's name.
-
-def getfuncname(func):
- try:
- return func.func_name
- except AttributeError:
- pass
- return getcodename(func.func_code)
-
-# A part of the above code to extract just the line number from a code object.
-
-def getlineno(co):
- try:
- return co.co_firstlineno
- except AttributeError:
- pass
- code = co.co_code
- if ord(code[0]) == SET_LINENO:
- return ord(code[1]) | ord(code[2]) << 8
- else:
- return -1
+++ /dev/null
-"""A class to build directory diff tools on."""
-
-import os
-
-import dircache
-import cmpcache
-import statcache
-from stat import *
-
-class dircmp:
- """Directory comparison class."""
-
- def new(self, a, b):
- """Initialize."""
- self.a = a
- self.b = b
- # Properties that caller may change before calling self.run():
- self.hide = [os.curdir, os.pardir] # Names never to be shown
- self.ignore = ['RCS', 'tags'] # Names ignored in comparison
-
- return self
-
- def run(self):
- """Compare everything except common subdirectories."""
- self.a_list = filter(dircache.listdir(self.a), self.hide)
- self.b_list = filter(dircache.listdir(self.b), self.hide)
- self.a_list.sort()
- self.b_list.sort()
- self.phase1()
- self.phase2()
- self.phase3()
-
- def phase1(self):
- """Compute common names."""
- self.a_only = []
- self.common = []
- for x in self.a_list:
- if x in self.b_list:
- self.common.append(x)
- else:
- self.a_only.append(x)
-
- self.b_only = []
- for x in self.b_list:
- if x not in self.common:
- self.b_only.append(x)
-
- def phase2(self):
- """Distinguish files, directories, funnies."""
- self.common_dirs = []
- self.common_files = []
- self.common_funny = []
-
- for x in self.common:
- a_path = os.path.join(self.a, x)
- b_path = os.path.join(self.b, x)
-
- ok = 1
- try:
- a_stat = statcache.stat(a_path)
- except os.error, why:
- # print 'Can\'t stat', a_path, ':', why[1]
- ok = 0
- try:
- b_stat = statcache.stat(b_path)
- except os.error, why:
- # print 'Can\'t stat', b_path, ':', why[1]
- ok = 0
-
- if ok:
- a_type = S_IFMT(a_stat[ST_MODE])
- b_type = S_IFMT(b_stat[ST_MODE])
- if a_type != b_type:
- self.common_funny.append(x)
- elif S_ISDIR(a_type):
- self.common_dirs.append(x)
- elif S_ISREG(a_type):
- self.common_files.append(x)
- else:
- self.common_funny.append(x)
- else:
- self.common_funny.append(x)
-
- def phase3(self):
- """Find out differences between common files."""
- xx = cmpfiles(self.a, self.b, self.common_files)
- self.same_files, self.diff_files, self.funny_files = xx
-
- def phase4(self):
- """Find out differences between common subdirectories.
- A new dircmp object is created for each common subdirectory,
- these are stored in a dictionary indexed by filename.
- The hide and ignore properties are inherited from the parent."""
- self.subdirs = {}
- for x in self.common_dirs:
- a_x = os.path.join(self.a, x)
- b_x = os.path.join(self.b, x)
- self.subdirs[x] = newdd = dircmp().new(a_x, b_x)
- newdd.hide = self.hide
- newdd.ignore = self.ignore
- newdd.run()
-
- def phase4_closure(self):
- """Recursively call phase4() on subdirectories."""
- self.phase4()
- for x in self.subdirs.keys():
- self.subdirs[x].phase4_closure()
-
- def report(self):
- """Print a report on the differences between a and b."""
- # Assume that phases 1 to 3 have been executed
- # Output format is purposely lousy
- print 'diff', self.a, self.b
- if self.a_only:
- print 'Only in', self.a, ':', self.a_only
- if self.b_only:
- print 'Only in', self.b, ':', self.b_only
- if self.same_files:
- print 'Identical files :', self.same_files
- if self.diff_files:
- print 'Differing files :', self.diff_files
- if self.funny_files:
- print 'Trouble with common files :', self.funny_files
- if self.common_dirs:
- print 'Common subdirectories :', self.common_dirs
- if self.common_funny:
- print 'Common funny cases :', self.common_funny
-
- def report_closure(self):
- """Print reports on self and on subdirs.
- If phase 4 hasn't been done, no subdir reports are printed."""
- self.report()
- try:
- x = self.subdirs
- except AttributeError:
- return # No subdirectories computed
- for x in self.subdirs.keys():
- print
- self.subdirs[x].report_closure()
-
- def report_phase4_closure(self):
- """Report and do phase 4 recursively."""
- self.report()
- self.phase4()
- for x in self.subdirs.keys():
- print
- self.subdirs[x].report_phase4_closure()
-
-
-def cmpfiles(a, b, common):
- """Compare common files in two directories.
- Return:
- - files that compare equal
- - files that compare different
- - funny cases (can't stat etc.)"""
-
- res = ([], [], [])
- for x in common:
- res[cmp(os.path.join(a, x), os.path.join(b, x))].append(x)
- return res
-
-
-def cmp(a, b):
- """Compare two files.
- Return:
- 0 for equal
- 1 for different
- 2 for funny cases (can't stat, etc.)"""
-
- try:
- if cmpcache.cmp(a, b): return 0
- return 1
- except os.error:
- return 2
-
-
-def filter(list, skip):
- """Return a copy with items that occur in skip removed."""
-
- result = []
- for item in list:
- if item not in skip: result.append(item)
- return result
-
-
-def demo():
- """Demonstration and testing."""
-
- import sys
- import getopt
- options, args = getopt.getopt(sys.argv[1:], 'r')
- if len(args) != 2:
- raise getopt.error, 'need exactly two args'
- dd = dircmp().new(args[0], args[1])
- dd.run()
- if ('-r', '') in options:
- dd.report_phase4_closure()
- else:
- dd.report()
-
-if __name__ == "__main__":
- demo()
+++ /dev/null
-# Module 'dump'
-#
-# Print python code that reconstructs a variable.
-# This only works in certain cases.
-#
-# It works fine for:
-# - ints and floats (except NaNs and other weird things)
-# - strings
-# - compounds and lists, provided it works for all their elements
-# - imported modules, provided their name is the module name
-#
-# It works for top-level dictionaries but not for dictionaries
-# contained in other objects (could be made to work with some hassle
-# though).
-#
-# It does not work for functions (all sorts), classes, class objects,
-# windows, files etc.
-#
-# Finally, objects referenced by more than one name or contained in more
-# than one other object lose their sharing property (this is bad for
-# strings used as exception identifiers, for instance).
-
-# Dump a whole symbol table
-#
-def dumpsymtab(dict):
- for key in dict.keys():
- dumpvar(key, dict[key])
-
-# Dump a single variable
-#
-def dumpvar(name, x):
- import sys
- t = type(x)
- if t == type({}):
- print name, '= {}'
- for key in x.keys():
- item = x[key]
- if not printable(item):
- print '#',
- print name, '[', `key`, '] =', `item`
- elif t in (type(''), type(0), type(0.0), type([]), type(())):
- if not printable(x):
- print '#',
- print name, '=', `x`
- elif t == type(sys):
- print 'import', name, '#', x
- else:
- print '#', name, '=', x
-
-# check if a value is printable in a way that can be read back with input()
-#
-def printable(x):
- t = type(x)
- if t in (type(''), type(0), type(0.0)):
- return 1
- if t in (type([]), type(())):
- for item in x:
- if not printable(item):
- return 0
- return 1
- if x == {}:
- return 1
- return 0
+++ /dev/null
-import fnmatch
-import os
-
-_debug = 0
-
-_prune = ['(*)']
-
-def find(pattern, dir = os.curdir):
- list = []
- names = os.listdir(dir)
- names.sort()
- for name in names:
- if name in (os.curdir, os.pardir):
- continue
- fullname = os.path.join(dir, name)
- if fnmatch.fnmatch(name, pattern):
- list.append(fullname)
- if os.path.isdir(fullname) and not os.path.islink(fullname):
- for p in _prune:
- if fnmatch.fnmatch(name, p):
- if _debug: print "skip", `fullname`
- break
- else:
- if _debug: print "descend into", `fullname`
- list = list + find(pattern, fullname)
- return list
+++ /dev/null
-# Text formatting abstractions
-# Note -- this module is obsolete, it's too slow anyway
-
-
-import string
-import Para
-
-
-# A formatter back-end object has one method that is called by the formatter:
-# addpara(p), where p is a paragraph object. For example:
-
-
-# Formatter back-end to do nothing at all with the paragraphs
-class NullBackEnd:
- #
- def __init__(self):
- pass
- #
- def addpara(self, p):
- pass
- #
- def bgn_anchor(self, id):
- pass
- #
- def end_anchor(self, id):
- pass
-
-
-# Formatter back-end to collect the paragraphs in a list
-class SavingBackEnd(NullBackEnd):
- #
- def __init__(self):
- self.paralist = []
- #
- def addpara(self, p):
- self.paralist.append(p)
- #
- def hitcheck(self, h, v):
- hits = []
- for p in self.paralist:
- if p.top <= v <= p.bottom:
- for id in p.hitcheck(h, v):
- if id not in hits:
- hits.append(id)
- return hits
- #
- def extract(self):
- text = ''
- for p in self.paralist:
- text = text + (p.extract())
- return text
- #
- def extractpart(self, long1, long2):
- if long1 > long2: long1, long2 = long2, long1
- para1, pos1 = long1
- para2, pos2 = long2
- text = ''
- while para1 < para2:
- ptext = self.paralist[para1].extract()
- text = text + ptext[pos1:]
- pos1 = 0
- para1 = para1 + 1
- ptext = self.paralist[para2].extract()
- return text + ptext[pos1:pos2]
- #
- def whereis(self, d, h, v):
- total = 0
- for i in range(len(self.paralist)):
- p = self.paralist[i]
- result = p.whereis(d, h, v)
- if result is not None:
- return i, result
- return None
- #
- def roundtowords(self, long1, long2):
- i, offset = long1
- text = self.paralist[i].extract()
- while offset > 0 and text[offset-1] != ' ': offset = offset-1
- long1 = i, offset
- #
- i, offset = long2
- text = self.paralist[i].extract()
- n = len(text)
- while offset < n-1 and text[offset] != ' ': offset = offset+1
- long2 = i, offset
- #
- return long1, long2
- #
- def roundtoparagraphs(self, long1, long2):
- long1 = long1[0], 0
- long2 = long2[0], len(self.paralist[long2[0]].extract())
- return long1, long2
-
-
-# Formatter back-end to send the text directly to the drawing object
-class WritingBackEnd(NullBackEnd):
- #
- def __init__(self, d, width):
- self.d = d
- self.width = width
- self.lineno = 0
- #
- def addpara(self, p):
- self.lineno = p.render(self.d, 0, self.lineno, self.width)
-
-
-# A formatter receives a stream of formatting instructions and assembles
-# these into a stream of paragraphs on to a back-end. The assembly is
-# parametrized by a text measurement object, which must match the output
-# operations of the back-end. The back-end is responsible for splitting
-# paragraphs up in lines of a given maximum width. (This is done because
-# in a windowing environment, when the window size changes, there is no
-# need to redo the assembly into paragraphs, but the splitting into lines
-# must be done taking the new window size into account.)
-
-
-# Formatter base class. Initialize it with a text measurement object,
-# which is used for text measurements, and a back-end object,
-# which receives the completed paragraphs. The formatting methods are:
-# setfont(font)
-# setleftindent(nspaces)
-# setjust(type) where type is 'l', 'c', 'r', or 'lr'
-# flush()
-# vspace(nlines)
-# needvspace(nlines)
-# addword(word, nspaces)
-class BaseFormatter:
- #
- def __init__(self, d, b):
- # Drawing object used for text measurements
- self.d = d
- #
- # BackEnd object receiving completed paragraphs
- self.b = b
- #
- # Parameters of the formatting model
- self.leftindent = 0
- self.just = 'l'
- self.font = None
- self.blanklines = 0
- #
- # Parameters derived from the current font
- self.space = d.textwidth(' ')
- self.line = d.lineheight()
- self.ascent = d.baseline()
- self.descent = self.line - self.ascent
- #
- # Parameter derived from the default font
- self.n_space = self.space
- #
- # Current paragraph being built
- self.para = None
- self.nospace = 1
- #
- # Font to set on the next word
- self.nextfont = None
- #
- def newpara(self):
- return Para.Para()
- #
- def setfont(self, font):
- if font is None: return
- self.font = self.nextfont = font
- d = self.d
- d.setfont(font)
- self.space = d.textwidth(' ')
- self.line = d.lineheight()
- self.ascent = d.baseline()
- self.descent = self.line - self.ascent
- #
- def setleftindent(self, nspaces):
- self.leftindent = int(self.n_space * nspaces)
- if self.para:
- hang = self.leftindent - self.para.indent_left
- if hang > 0 and self.para.getlength() <= hang:
- self.para.makehangingtag(hang)
- self.nospace = 1
- else:
- self.flush()
- #
- def setrightindent(self, nspaces):
- self.rightindent = int(self.n_space * nspaces)
- if self.para:
- self.para.indent_right = self.rightindent
- self.flush()
- #
- def setjust(self, just):
- self.just = just
- if self.para:
- self.para.just = self.just
- #
- def flush(self):
- if self.para:
- self.b.addpara(self.para)
- self.para = None
- if self.font is not None:
- self.d.setfont(self.font)
- self.nospace = 1
- #
- def vspace(self, nlines):
- self.flush()
- if nlines > 0:
- self.para = self.newpara()
- tuple = None, '', 0, 0, 0, int(nlines*self.line), 0
- self.para.words.append(tuple)
- self.flush()
- self.blanklines = self.blanklines + nlines
- #
- def needvspace(self, nlines):
- self.flush() # Just to be sure
- if nlines > self.blanklines:
- self.vspace(nlines - self.blanklines)
- #
- def addword(self, text, space):
- if self.nospace and not text:
- return
- self.nospace = 0
- self.blanklines = 0
- if not self.para:
- self.para = self.newpara()
- self.para.indent_left = self.leftindent
- self.para.just = self.just
- self.nextfont = self.font
- space = int(space * self.space)
- self.para.words.append((self.nextfont, text,
- self.d.textwidth(text), space, space,
- self.ascent, self.descent))
- self.nextfont = None
- #
- def bgn_anchor(self, id):
- if not self.para:
- self.nospace = 0
- self.addword('', 0)
- self.para.bgn_anchor(id)
- #
- def end_anchor(self, id):
- if not self.para:
- self.nospace = 0
- self.addword('', 0)
- self.para.end_anchor(id)
-
-
-# Measuring object for measuring text as viewed on a tty
-class NullMeasurer:
- #
- def __init__(self):
- pass
- #
- def setfont(self, font):
- pass
- #
- def textwidth(self, text):
- return len(text)
- #
- def lineheight(self):
- return 1
- #
- def baseline(self):
- return 0
-
-
-# Drawing object for writing plain ASCII text to a file
-class FileWriter:
- #
- def __init__(self, fp):
- self.fp = fp
- self.lineno, self.colno = 0, 0
- #
- def setfont(self, font):
- pass
- #
- def text(self, (h, v), str):
- if not str: return
- if '\n' in str:
- raise ValueError, 'can\'t write \\n'
- while self.lineno < v:
- self.fp.write('\n')
- self.colno, self.lineno = 0, self.lineno + 1
- while self.lineno > v:
- # XXX This should never happen...
- self.fp.write('\033[A') # ANSI up arrow
- self.lineno = self.lineno - 1
- if self.colno < h:
- self.fp.write(' ' * (h - self.colno))
- elif self.colno > h:
- self.fp.write('\b' * (self.colno - h))
- self.colno = h
- self.fp.write(str)
- self.colno = h + len(str)
-
-
-# Formatting class to do nothing at all with the data
-class NullFormatter(BaseFormatter):
- #
- def __init__(self):
- d = NullMeasurer()
- b = NullBackEnd()
- BaseFormatter.__init__(self, d, b)
-
-
-# Formatting class to write directly to a file
-class WritingFormatter(BaseFormatter):
- #
- def __init__(self, fp, width):
- dm = NullMeasurer()
- dw = FileWriter(fp)
- b = WritingBackEnd(dw, width)
- BaseFormatter.__init__(self, dm, b)
- self.blanklines = 1
- #
- # Suppress multiple blank lines
- def needvspace(self, nlines):
- BaseFormatter.needvspace(self, min(1, nlines))
-
-
-# A "FunnyFormatter" writes ASCII text with a twist: *bold words*,
-# _italic text_ and _underlined words_, and `quoted text'.
-# It assumes that the fonts are 'r', 'i', 'b', 'u', 'q': (roman,
-# italic, bold, underline, quote).
-# Moreover, if the font is in upper case, the text is converted to
-# UPPER CASE.
-class FunnyFormatter(WritingFormatter):
- #
- def flush(self):
- if self.para: finalize(self.para)
- WritingFormatter.flush(self)
-
-
-# Surrounds *bold words* and _italic text_ in a paragraph with
-# appropriate markers, fixing the size (assuming these characters'
-# width is 1).
-openchar = \
- {'b':'*', 'i':'_', 'u':'_', 'q':'`', 'B':'*', 'I':'_', 'U':'_', 'Q':'`'}
-closechar = \
- {'b':'*', 'i':'_', 'u':'_', 'q':'\'', 'B':'*', 'I':'_', 'U':'_', 'Q':'\''}
-def finalize(para):
- oldfont = curfont = 'r'
- para.words.append(('r', '', 0, 0, 0, 0)) # temporary, deleted at end
- for i in range(len(para.words)):
- fo, te, wi = para.words[i][:3]
- if fo is not None: curfont = fo
- if curfont != oldfont:
- if closechar.has_key(oldfont):
- c = closechar[oldfont]
- j = i-1
- while j > 0 and para.words[j][1] == '': j = j-1
- fo1, te1, wi1 = para.words[j][:3]
- te1 = te1 + c
- wi1 = wi1 + len(c)
- para.words[j] = (fo1, te1, wi1) + \
- para.words[j][3:]
- if openchar.has_key(curfont) and te:
- c = openchar[curfont]
- te = c + te
- wi = len(c) + wi
- para.words[i] = (fo, te, wi) + \
- para.words[i][3:]
- if te: oldfont = curfont
- else: oldfont = 'r'
- if curfont in string.uppercase:
- te = string.upper(te)
- para.words[i] = (fo, te, wi) + para.words[i][3:]
- del para.words[-1]
-
-
-# Formatter back-end to draw the text in a window.
-# This has an option to draw while the paragraphs are being added,
-# to minimize the delay before the user sees anything.
-# This manages the entire "document" of the window.
-class StdwinBackEnd(SavingBackEnd):
- #
- def __init__(self, window, drawnow):
- self.window = window
- self.drawnow = drawnow
- self.width = window.getwinsize()[0]
- self.selection = None
- self.height = 0
- window.setorigin(0, 0)
- window.setdocsize(0, 0)
- self.d = window.begindrawing()
- SavingBackEnd.__init__(self)
- #
- def finish(self):
- self.d.close()
- self.d = None
- self.window.setdocsize(0, self.height)
- #
- def addpara(self, p):
- self.paralist.append(p)
- if self.drawnow:
- self.height = \
- p.render(self.d, 0, self.height, self.width)
- else:
- p.layout(self.width)
- p.left = 0
- p.top = self.height
- p.right = self.width
- p.bottom = self.height + p.height
- self.height = p.bottom
- #
- def resize(self):
- self.window.change((0, 0), (self.width, self.height))
- self.width = self.window.getwinsize()[0]
- self.height = 0
- for p in self.paralist:
- p.layout(self.width)
- p.left = 0
- p.top = self.height
- p.right = self.width
- p.bottom = self.height + p.height
- self.height = p.bottom
- self.window.change((0, 0), (self.width, self.height))
- self.window.setdocsize(0, self.height)
- #
- def redraw(self, area):
- d = self.window.begindrawing()
- (left, top), (right, bottom) = area
- d.erase(area)
- d.cliprect(area)
- for p in self.paralist:
- if top < p.bottom and p.top < bottom:
- v = p.render(d, p.left, p.top, p.right)
- if self.selection:
- self.invert(d, self.selection)
- d.close()
- #
- def setselection(self, new):
- if new:
- long1, long2 = new
- pos1 = long1[:3]
- pos2 = long2[:3]
- new = pos1, pos2
- if new != self.selection:
- d = self.window.begindrawing()
- if self.selection:
- self.invert(d, self.selection)
- if new:
- self.invert(d, new)
- d.close()
- self.selection = new
- #
- def getselection(self):
- return self.selection
- #
- def extractselection(self):
- if self.selection:
- a, b = self.selection
- return self.extractpart(a, b)
- else:
- return None
- #
- def invert(self, d, region):
- long1, long2 = region
- if long1 > long2: long1, long2 = long2, long1
- para1, pos1 = long1
- para2, pos2 = long2
- while para1 < para2:
- self.paralist[para1].invert(d, pos1, None)
- pos1 = None
- para1 = para1 + 1
- self.paralist[para2].invert(d, pos1, pos2)
- #
- def search(self, prog):
- import re, string
- if type(prog) is type(''):
- prog = re.compile(string.lower(prog))
- if self.selection:
- iold = self.selection[0][0]
- else:
- iold = -1
- hit = None
- for i in range(len(self.paralist)):
- if i == iold or i < iold and hit:
- continue
- p = self.paralist[i]
- text = string.lower(p.extract())
- match = prog.search(text)
- if match:
- a, b = match.group(0)
- long1 = i, a
- long2 = i, b
- hit = long1, long2
- if i > iold:
- break
- if hit:
- self.setselection(hit)
- i = hit[0][0]
- p = self.paralist[i]
- self.window.show((p.left, p.top), (p.right, p.bottom))
- return 1
- else:
- return 0
- #
- def showanchor(self, id):
- for i in range(len(self.paralist)):
- p = self.paralist[i]
- if p.hasanchor(id):
- long1 = i, 0
- long2 = i, len(p.extract())
- hit = long1, long2
- self.setselection(hit)
- self.window.show(
- (p.left, p.top), (p.right, p.bottom))
- break
-
-
-# GL extensions
-
-class GLFontCache:
- #
- def __init__(self):
- self.reset()
- self.setfont('')
- #
- def reset(self):
- self.fontkey = None
- self.fonthandle = None
- self.fontinfo = None
- self.fontcache = {}
- #
- def close(self):
- self.reset()
- #
- def setfont(self, fontkey):
- if fontkey == '':
- fontkey = 'Times-Roman 12'
- elif ' ' not in fontkey:
- fontkey = fontkey + ' 12'
- if fontkey == self.fontkey:
- return
- if self.fontcache.has_key(fontkey):
- handle = self.fontcache[fontkey]
- else:
- import string
- i = string.index(fontkey, ' ')
- name, sizestr = fontkey[:i], fontkey[i:]
- size = eval(sizestr)
- key1 = name + ' 1'
- key = name + ' ' + `size`
- # NB key may differ from fontkey!
- if self.fontcache.has_key(key):
- handle = self.fontcache[key]
- else:
- if self.fontcache.has_key(key1):
- handle = self.fontcache[key1]
- else:
- import fm
- handle = fm.findfont(name)
- self.fontcache[key1] = handle
- handle = handle.scalefont(size)
- self.fontcache[fontkey] = \
- self.fontcache[key] = handle
- self.fontkey = fontkey
- if self.fonthandle != handle:
- self.fonthandle = handle
- self.fontinfo = handle.getfontinfo()
- handle.setfont()
-
-
-class GLMeasurer(GLFontCache):
- #
- def textwidth(self, text):
- return self.fonthandle.getstrwidth(text)
- #
- def baseline(self):
- return self.fontinfo[6] - self.fontinfo[3]
- #
- def lineheight(self):
- return self.fontinfo[6]
-
-
-class GLWriter(GLFontCache):
- #
- # NOTES:
- # (1) Use gl.ortho2 to use X pixel coordinates!
- #
- def text(self, (h, v), text):
- import gl, fm
- gl.cmov2i(h, v + self.fontinfo[6] - self.fontinfo[3])
- fm.prstr(text)
- #
- def setfont(self, fontkey):
- oldhandle = self.fonthandle
- GLFontCache.setfont(fontkey)
- if self.fonthandle != oldhandle:
- handle.setfont()
-
-
-class GLMeasurerWriter(GLMeasurer, GLWriter):
- pass
-
-
-class GLBackEnd(SavingBackEnd):
- #
- def __init__(self, wid):
- import gl
- gl.winset(wid)
- self.wid = wid
- self.width = gl.getsize()[1]
- self.height = 0
- self.d = GLMeasurerWriter()
- SavingBackEnd.__init__(self)
- #
- def finish(self):
- pass
- #
- def addpara(self, p):
- self.paralist.append(p)
- self.height = p.render(self.d, 0, self.height, self.width)
- #
- def redraw(self):
- import gl
- gl.winset(self.wid)
- width = gl.getsize()[1]
- if width != self.width:
- setdocsize = 1
- self.width = width
- for p in self.paralist:
- p.top = p.bottom = None
- d = self.d
- v = 0
- for p in self.paralist:
- v = p.render(d, 0, v, width)
+++ /dev/null
-# 'grep'
-
-import regex
-from regex_syntax import *
-
-opt_show_where = 0
-opt_show_filename = 0
-opt_show_lineno = 1
-
-def grep(pat, *files):
- return ggrep(RE_SYNTAX_GREP, pat, files)
-
-def egrep(pat, *files):
- return ggrep(RE_SYNTAX_EGREP, pat, files)
-
-def emgrep(pat, *files):
- return ggrep(RE_SYNTAX_EMACS, pat, files)
-
-def ggrep(syntax, pat, files):
- if len(files) == 1 and type(files[0]) == type([]):
- files = files[0]
- global opt_show_filename
- opt_show_filename = (len(files) != 1)
- syntax = regex.set_syntax(syntax)
- try:
- prog = regex.compile(pat)
- finally:
- syntax = regex.set_syntax(syntax)
- for filename in files:
- fp = open(filename, 'r')
- lineno = 0
- while 1:
- line = fp.readline()
- if not line: break
- lineno = lineno + 1
- if prog.search(line) >= 0:
- showline(filename, lineno, line, prog)
- fp.close()
-
-def pgrep(pat, *files):
- if len(files) == 1 and type(files[0]) == type([]):
- files = files[0]
- global opt_show_filename
- opt_show_filename = (len(files) != 1)
- import re
- prog = re.compile(pat)
- for filename in files:
- fp = open(filename, 'r')
- lineno = 0
- while 1:
- line = fp.readline()
- if not line: break
- lineno = lineno + 1
- if prog.search(line):
- showline(filename, lineno, line, prog)
- fp.close()
-
-def showline(filename, lineno, line, prog):
- if line[-1:] == '\n': line = line[:-1]
- if opt_show_lineno:
- prefix = `lineno`.rjust(3) + ': '
- else:
- prefix = ''
- if opt_show_filename:
- prefix = filename + ': ' + prefix
- print prefix + line
- if opt_show_where:
- start, end = prog.regs()[0]
- line = line[:start]
- if '\t' not in line:
- prefix = ' ' * (len(prefix) + start)
- else:
- prefix = ' ' * len(prefix)
- for c in line:
- if c != '\t': c = ' '
- prefix = prefix + c
- if start == end: prefix = prefix + '\\'
- else: prefix = prefix + '^'*(end-start)
- print prefix
+++ /dev/null
-import struct, fcntl
-
-def writelock(f):
- _lock(f, fcntl.F_WRLCK)
-
-def readlock(f):
- _lock(f, fcntl.F_RDLCK)
-
-def unlock(f):
- _lock(f, fcntl.F_UNLCK)
-
-def _lock(f, op):
- dummy = fcntl.fcntl(f.fileno(), fcntl.F_SETLKW,
- struct.pack('2h8l', op,
- 0, 0, 0, 0, 0, 0, 0, 0, 0))
+++ /dev/null
-# New dir() function
-
-
-# This should be the new dir(), except that it should still list
-# the current local name space by default
-
-def listattrs(x):
- try:
- dictkeys = x.__dict__.keys()
- except (AttributeError, TypeError):
- dictkeys = []
- #
- try:
- methods = x.__methods__
- except (AttributeError, TypeError):
- methods = []
- #
- try:
- members = x.__members__
- except (AttributeError, TypeError):
- members = []
- #
- try:
- the_class = x.__class__
- except (AttributeError, TypeError):
- the_class = None
- #
- try:
- bases = x.__bases__
- except (AttributeError, TypeError):
- bases = ()
- #
- total = dictkeys + methods + members
- if the_class:
- # It's a class instace; add the class's attributes
- # that are functions (methods)...
- class_attrs = listattrs(the_class)
- class_methods = []
- for name in class_attrs:
- if is_function(getattr(the_class, name)):
- class_methods.append(name)
- total = total + class_methods
- elif bases:
- # It's a derived class; add the base class attributes
- for base in bases:
- base_attrs = listattrs(base)
- total = total + base_attrs
- total.sort()
- return total
- i = 0
- while i+1 < len(total):
- if total[i] == total[i+1]:
- del total[i+1]
- else:
- i = i+1
- return total
-
-
-# Helper to recognize functions
-
-def is_function(x):
- return type(x) == type(is_function)
-
-
-# Approximation of builtin dir(); but note that this lists the user's
-# variables by default, not the current local name space.
-
-def dir(x = None):
- if x is not None:
- return listattrs(x)
- else:
- import __main__
- return listattrs(__main__)
+++ /dev/null
-"""New import scheme with package support.
-
-Quick Reference
----------------
-
-- To enable package support, execute "import ni" before importing any
- packages. Importing this module automatically installs the relevant
- import hooks.
-
-- To create a package named spam containing sub-modules ham, bacon and
- eggs, create a directory spam somewhere on Python's module search
- path (i.e. spam's parent directory must be one of the directories in
- sys.path or $PYTHONPATH); then create files ham.py, bacon.py and
- eggs.py inside spam.
-
-- To import module ham from package spam and use function hamneggs()
- from that module, you can either do
-
- import spam.ham # *not* "import spam" !!!
- spam.ham.hamneggs()
-
- or
-
- from spam import ham
- ham.hamneggs()
-
- or
-
- from spam.ham import hamneggs
- hamneggs()
-
-- Importing just "spam" does not do what you expect: it creates an
- empty package named spam if one does not already exist, but it does
- not import spam's submodules. The only submodule that is guaranteed
- to be imported is spam.__init__, if it exists. Note that
- spam.__init__ is a submodule of package spam. It can reference to
- spam's namespace via the '__.' prefix, for instance
-
- __.spam_inited = 1 # Set a package-level variable
-
-
-
-Theory of Operation
--------------------
-
-A Package is a module that can contain other modules. Packages can be
-nested. Package introduce dotted names for modules, like P.Q.M, which
-could correspond to a file P/Q/M.py found somewhere on sys.path. It
-is possible to import a package itself, though this makes little sense
-unless the package contains a module called __init__.
-
-A package has two variables that control the namespace used for
-packages and modules, both initialized to sensible defaults the first
-time the package is referenced.
-
-(1) A package's *module search path*, contained in the per-package
-variable __path__, defines a list of *directories* where submodules or
-subpackages of the package are searched. It is initialized to the
-directory containing the package. Setting this variable to None makes
-the module search path default to sys.path (this is not quite the same
-as setting it to sys.path, since the latter won't track later
-assignments to sys.path).
-
-(2) A package's *import domain*, contained in the per-package variable
-__domain__, defines a list of *packages* that are searched (using
-their respective module search paths) to satisfy imports. It is
-initialized to the list consisting of the package itself, its parent
-package, its parent's parent, and so on, ending with the root package
-(the nameless package containing all top-level packages and modules,
-whose module search path is None, implying sys.path).
-
-The default domain implements a search algorithm called "expanding
-search". An alternative search algorithm called "explicit search"
-fixes the import search path to contain only the root package,
-requiring the modules in the package to name all imported modules by
-their full name. The convention of using '__' to refer to the current
-package (both as a per-module variable and in module names) can be
-used by packages using explicit search to refer to modules in the same
-package; this combination is known as "explicit-relative search".
-
-The PackageImporter and PackageLoader classes together implement the
-following policies:
-
-- There is a root package, whose name is ''. It cannot be imported
- directly but may be referenced, e.g. by using '__' from a top-level
- module.
-
-- In each module or package, the variable '__' contains a reference to
- the parent package; in the root package, '__' points to itself.
-
-- In the name for imported modules (e.g. M in "import M" or "from M
- import ..."), a leading '__' refers to the current package (i.e.
- the package containing the current module); leading '__.__' and so
- on refer to the current package's parent, and so on. The use of
- '__' elsewhere in the module name is not supported.
-
-- Modules are searched using the "expanding search" algorithm by
- virtue of the default value for __domain__.
-
-- If A.B.C is imported, A is searched using __domain__; then
- subpackage B is searched in A using its __path__, and so on.
-
-- Built-in modules have priority: even if a file sys.py exists in a
- package, "import sys" imports the built-in sys module.
-
-- The same holds for frozen modules, for better or for worse.
-
-- Submodules and subpackages are not automatically loaded when their
- parent packages is loaded.
-
-- The construct "from package import *" is illegal. (It can still be
- used to import names from a module.)
-
-- When "from package import module1, module2, ..." is used, those
- modules are explicitly loaded.
-
-- When a package is loaded, if it has a submodule __init__, that
- module is loaded. This is the place where required submodules can
- be loaded, the __path__ variable extended, etc. The __init__ module
- is loaded even if the package was loaded only in order to create a
- stub for a sub-package: if "import P.Q.R" is the first reference to
- P, and P has a submodule __init__, P.__init__ is loaded before P.Q
- is even searched.
-
-Caveats:
-
-- It is possible to import a package that has no __init__ submodule;
- this is not particularly useful but there may be useful applications
- for it (e.g. to manipulate its search paths from the outside!).
-
-- There are no special provisions for os.chdir(). If you plan to use
- os.chdir() before you have imported all your modules, it is better
- not to have relative pathnames in sys.path. (This could actually be
- fixed by changing the implementation of path_join() in the hook to
- absolutize paths.)
-
-- Packages and modules are introduced in sys.modules as soon as their
- loading is started. When the loading is terminated by an exception,
- the sys.modules entries remain around.
-
-- There are no special measures to support mutually recursive modules,
- but it will work under the same conditions where it works in the
- flat module space system.
-
-- Sometimes dummy entries (whose value is None) are entered in
- sys.modules, to indicate that a particular module does not exist --
- this is done to speed up the expanding search algorithm when a
- module residing at a higher level is repeatedly imported (Python
- promises that importing a previously imported module is cheap!)
-
-- Although dynamically loaded extensions are allowed inside packages,
- the current implementation (hardcoded in the interpreter) of their
- initialization may cause problems if an extension invokes the
- interpreter during its initialization.
-
-- reload() may find another version of the module only if it occurs on
- the package search path. Thus, it keeps the connection to the
- package to which the module belongs, but may find a different file.
-
-XXX Need to have an explicit name for '', e.g. '__root__'.
-
-"""
-
-
-import imp
-import sys
-import __builtin__
-
-import ihooks
-from ihooks import ModuleLoader, ModuleImporter
-
-
-class PackageLoader(ModuleLoader):
-
- """A subclass of ModuleLoader with package support.
-
- find_module_in_dir() will succeed if there's a subdirectory with
- the given name; load_module() will create a stub for a package and
- load its __init__ module if it exists.
-
- """
-
- def find_module_in_dir(self, name, dir):
- if dir is not None:
- dirname = self.hooks.path_join(dir, name)
- if self.hooks.path_isdir(dirname):
- return None, dirname, ('', '', 'PACKAGE')
- return ModuleLoader.find_module_in_dir(self, name, dir)
-
- def load_module(self, name, stuff):
- file, filename, info = stuff
- suff, mode, type = info
- if type == 'PACKAGE':
- return self.load_package(name, stuff)
- if sys.modules.has_key(name):
- m = sys.modules[name]
- else:
- sys.modules[name] = m = imp.new_module(name)
- self.set_parent(m)
- if type == imp.C_EXTENSION and '.' in name:
- return self.load_dynamic(name, stuff)
- else:
- return ModuleLoader.load_module(self, name, stuff)
-
- def load_dynamic(self, name, stuff):
- file, filename, (suff, mode, type) = stuff
- # Hack around restriction in imp.load_dynamic()
- i = name.rfind('.')
- tail = name[i+1:]
- if sys.modules.has_key(tail):
- save = sys.modules[tail]
- else:
- save = None
- sys.modules[tail] = imp.new_module(name)
- try:
- m = imp.load_dynamic(tail, filename, file)
- finally:
- if save:
- sys.modules[tail] = save
- else:
- del sys.modules[tail]
- sys.modules[name] = m
- return m
-
- def load_package(self, name, stuff):
- file, filename, info = stuff
- if sys.modules.has_key(name):
- package = sys.modules[name]
- else:
- sys.modules[name] = package = imp.new_module(name)
- package.__path__ = [filename]
- self.init_package(package)
- return package
-
- def init_package(self, package):
- self.set_parent(package)
- self.set_domain(package)
- self.call_init_module(package)
-
- def set_parent(self, m):
- name = m.__name__
- if '.' in name:
- name = name[:name.rfind('.')]
- else:
- name = ''
- m.__ = sys.modules[name]
-
- def set_domain(self, package):
- name = package.__name__
- package.__domain__ = domain = [name]
- while '.' in name:
- name = name[:name.rfind('.')]
- domain.append(name)
- if name:
- domain.append('')
-
- def call_init_module(self, package):
- stuff = self.find_module('__init__', package.__path__)
- if stuff:
- m = self.load_module(package.__name__ + '.__init__', stuff)
- package.__init__ = m
-
-
-class PackageImporter(ModuleImporter):
-
- """Importer that understands packages and '__'."""
-
- def __init__(self, loader = None, verbose = 0):
- ModuleImporter.__init__(self,
- loader or PackageLoader(None, verbose), verbose)
-
- def import_module(self, name, globals={}, locals={}, fromlist=[]):
- if globals.has_key('__'):
- package = globals['__']
- else:
- # No calling context, assume in root package
- package = sys.modules['']
- if name[:3] in ('__.', '__'):
- p = package
- name = name[3:]
- while name[:3] in ('__.', '__'):
- p = p.__
- name = name[3:]
- if not name:
- return self.finish(package, p, '', fromlist)
- if '.' in name:
- i = name.find('.')
- name, tail = name[:i], name[i:]
- else:
- tail = ''
- mname = p.__name__ and p.__name__+'.'+name or name
- m = self.get1(mname)
- return self.finish(package, m, tail, fromlist)
- if '.' in name:
- i = name.find('.')
- name, tail = name[:i], name[i:]
- else:
- tail = ''
- for pname in package.__domain__:
- mname = pname and pname+'.'+name or name
- m = self.get0(mname)
- if m: break
- else:
- raise ImportError, "No such module %s" % name
- return self.finish(m, m, tail, fromlist)
-
- def finish(self, module, m, tail, fromlist):
- # Got ....A; now get ....A.B.C.D
- yname = m.__name__
- if tail and sys.modules.has_key(yname + tail): # Fast path
- yname, tail = yname + tail, ''
- m = self.get1(yname)
- while tail:
- i = tail.find('.', 1)
- if i > 0:
- head, tail = tail[:i], tail[i:]
- else:
- head, tail = tail, ''
- yname = yname + head
- m = self.get1(yname)
-
- # Got ....A.B.C.D; now finalize things depending on fromlist
- if not fromlist:
- return module
- if '__' in fromlist:
- raise ImportError, "Can't import __ from anywhere"
- if not hasattr(m, '__path__'): return m
- if '*' in fromlist:
- raise ImportError, "Can't import * from a package"
- for f in fromlist:
- if hasattr(m, f): continue
- fname = yname + '.' + f
- self.get1(fname)
- return m
-
- def get1(self, name):
- m = self.get(name)
- if not m:
- raise ImportError, "No module named %s" % name
- return m
-
- def get0(self, name):
- m = self.get(name)
- if not m:
- sys.modules[name] = None
- return m
-
- def get(self, name):
- # Internal routine to get or load a module when its parent exists
- if sys.modules.has_key(name):
- return sys.modules[name]
- if '.' in name:
- i = name.rfind('.')
- head, tail = name[:i], name[i+1:]
- else:
- head, tail = '', name
- path = sys.modules[head].__path__
- stuff = self.loader.find_module(tail, path)
- if not stuff:
- return None
- sys.modules[name] = m = self.loader.load_module(name, stuff)
- if head:
- setattr(sys.modules[head], tail, m)
- return m
-
- def reload(self, module):
- name = module.__name__
- if '.' in name:
- i = name.rfind('.')
- head, tail = name[:i], name[i+1:]
- path = sys.modules[head].__path__
- else:
- tail = name
- path = sys.modules[''].__path__
- stuff = self.loader.find_module(tail, path)
- if not stuff:
- raise ImportError, "No module named %s" % name
- return self.loader.load_module(name, stuff)
-
- def unload(self, module):
- if hasattr(module, '__path__'):
- raise ImportError, "don't know how to unload packages yet"
- PackageImporter.unload(self, module)
-
- def install(self):
- if not sys.modules.has_key(''):
- sys.modules[''] = package = imp.new_module('')
- package.__path__ = None
- self.loader.init_package(package)
- for m in sys.modules.values():
- if not m: continue
- if not hasattr(m, '__'):
- self.loader.set_parent(m)
- ModuleImporter.install(self)
-
-
-def install(v = 0):
- ihooks.install(PackageImporter(None, v))
-
-def uninstall():
- ihooks.uninstall()
-
-def ni(v = 0):
- install(v)
-
-def no():
- uninstall()
-
-def test():
- import pdb
- try:
- testproper()
- except:
- sys.last_type, sys.last_value, sys.last_traceback = sys.exc_info()
- print
- print sys.last_type, ':', sys.last_value
- print
- pdb.pm()
-
-def testproper():
- install(1)
- try:
- import mactest
- print dir(mactest)
- raw_input('OK?')
- finally:
- uninstall()
-
-
-if __name__ == '__main__':
- test()
-else:
- install()
+++ /dev/null
-# Module 'packmail' -- create a self-unpacking shell archive.
-
-# This module works on UNIX and on the Mac; the archives can unpack
-# themselves only on UNIX.
-
-import os
-from stat import ST_MTIME
-
-# Print help
-def help():
- print 'All fns have a file open for writing as first parameter'
- print 'pack(f, fullname, name): pack fullname as name'
- print 'packsome(f, directory, namelist): selected files from directory'
- print 'packall(f, directory): pack all files from directory'
- print 'packnotolder(f, directory, name): pack all files from directory'
- print ' that are not older than a file there'
- print 'packtree(f, directory): pack entire directory tree'
-
-# Pack one file
-def pack(outfp, file, name):
- fp = open(file, 'r')
- outfp.write('echo ' + name + '\n')
- outfp.write('sed "s/^X//" >"' + name + '" <<"!"\n')
- while 1:
- line = fp.readline()
- if not line: break
- if line[-1:] != '\n':
- line = line + '\n'
- outfp.write('X' + line)
- outfp.write('!\n')
- fp.close()
-
-# Pack some files from a directory
-def packsome(outfp, dirname, names):
- for name in names:
- print name
- file = os.path.join(dirname, name)
- pack(outfp, file, name)
-
-# Pack all files from a directory
-def packall(outfp, dirname):
- names = os.listdir(dirname)
- try:
- names.remove('.')
- except:
- pass
- try:
- names.remove('..')
- except:
- pass
- names.sort()
- packsome(outfp, dirname, names)
-
-# Pack all files from a directory that are not older than a give one
-def packnotolder(outfp, dirname, oldest):
- names = os.listdir(dirname)
- try:
- names.remove('.')
- except:
- pass
- try:
- names.remove('..')
- except:
- pass
- oldest = os.path.join(dirname, oldest)
- st = os.stat(oldest)
- mtime = st[ST_MTIME]
- todo = []
- for name in names:
- print name, '...',
- st = os.stat(os.path.join(dirname, name))
- if st[ST_MTIME] >= mtime:
- print 'Yes.'
- todo.append(name)
- else:
- print 'No.'
- todo.sort()
- packsome(outfp, dirname, todo)
-
-# Pack a whole tree (no exceptions)
-def packtree(outfp, dirname):
- print 'packtree', dirname
- outfp.write('mkdir ' + unixfix(dirname) + '\n')
- names = os.listdir(dirname)
- try:
- names.remove('.')
- except:
- pass
- try:
- names.remove('..')
- except:
- pass
- subdirs = []
- for name in names:
- fullname = os.path.join(dirname, name)
- if os.path.isdir(fullname):
- subdirs.append(fullname)
- else:
- print 'pack', fullname
- pack(outfp, fullname, unixfix(fullname))
- for subdirname in subdirs:
- packtree(outfp, subdirname)
-
-def unixfix(name):
- comps = name.split(os.sep)
- res = ''
- for comp in comps:
- if comp:
- if res: res = res + '/'
- res = res + comp
- return res
+++ /dev/null
-# module 'poly' -- Polynomials
-
-# A polynomial is represented by a list of coefficients, e.g.,
-# [1, 10, 5] represents 1*x**0 + 10*x**1 + 5*x**2 (or 1 + 10x + 5x**2).
-# There is no way to suppress internal zeros; trailing zeros are
-# taken out by normalize().
-
-def normalize(p): # Strip unnecessary zero coefficients
- n = len(p)
- while n:
- if p[n-1]: return p[:n]
- n = n-1
- return []
-
-def plus(a, b):
- if len(a) < len(b): a, b = b, a # make sure a is the longest
- res = a[:] # make a copy
- for i in range(len(b)):
- res[i] = res[i] + b[i]
- return normalize(res)
-
-def minus(a, b):
- neg_b = map(lambda x: -x, b[:])
- return plus(a, neg_b)
-
-def one(power, coeff): # Representation of coeff * x**power
- res = []
- for i in range(power): res.append(0)
- return res + [coeff]
-
-def times(a, b):
- res = []
- for i in range(len(a)):
- for j in range(len(b)):
- res = plus(res, one(i+j, a[i]*b[j]))
- return res
-
-def power(a, n): # Raise polynomial a to the positive integral power n
- if n == 0: return [1]
- if n == 1: return a
- if n/2*2 == n:
- b = power(a, n/2)
- return times(b, b)
- return times(power(a, n-1), a)
-
-def der(a): # First derivative
- res = a[1:]
- for i in range(len(res)):
- res[i] = res[i] * (i+1)
- return res
-
-# Computing a primitive function would require rational arithmetic...
+++ /dev/null
-# Module 'rand'
-# Don't use unless you want compatibility with C's rand()!
-
-import whrandom
-
-def srand(seed):
- whrandom.seed(seed%256, seed/256%256, seed/65536%256)
-
-def rand():
- return int(whrandom.random() * 32768.0) % 32768
-
-def choice(seq):
- return seq[rand() % len(seq)]
+++ /dev/null
-"""Maintain a cache of stat() information on files.
-
-There are functions to reset the cache or to selectively remove items.
-"""
-
-import warnings
-warnings.warn("The statcache module is obsolete. Use os.stat() instead.",
- DeprecationWarning)
-del warnings
-
-import os as _os
-from stat import *
-
-__all__ = ["stat","reset","forget","forget_prefix","forget_dir",
- "forget_except_prefix","isdir"]
-
-# The cache. Keys are pathnames, values are os.stat outcomes.
-# Remember that multiple threads may be calling this! So, e.g., that
-# path in cache returns 1 doesn't mean the cache will still contain
-# path on the next line. Code defensively.
-
-cache = {}
-
-def stat(path):
- """Stat a file, possibly out of the cache."""
- ret = cache.get(path, None)
- if ret is None:
- cache[path] = ret = _os.stat(path)
- return ret
-
-def reset():
- """Clear the cache."""
- cache.clear()
-
-# For thread saftey, always use forget() internally too.
-def forget(path):
- """Remove a given item from the cache, if it exists."""
- try:
- del cache[path]
- except KeyError:
- pass
-
-def forget_prefix(prefix):
- """Remove all pathnames with a given prefix."""
- for path in cache.keys():
- if path.startswith(prefix):
- forget(path)
-
-def forget_dir(prefix):
- """Forget a directory and all entries except for entries in subdirs."""
-
- # Remove trailing separator, if any. This is tricky to do in a
- # x-platform way. For example, Windows accepts both / and \ as
- # separators, and if there's nothing *but* a separator we want to
- # preserve that this is the root. Only os.path has the platform
- # knowledge we need.
- from os.path import split, join
- prefix = split(join(prefix, "xxx"))[0]
- forget(prefix)
- for path in cache.keys():
- # First check that the path at least starts with the prefix, so
- # that when it doesn't we can avoid paying for split().
- if path.startswith(prefix) and split(path)[0] == prefix:
- forget(path)
-
-def forget_except_prefix(prefix):
- """Remove all pathnames except with a given prefix.
-
- Normally used with prefix = '/' after a chdir().
- """
-
- for path in cache.keys():
- if not path.startswith(prefix):
- forget(path)
-
-def isdir(path):
- """Return True if directory, else False."""
- try:
- st = stat(path)
- except _os.error:
- return False
- return S_ISDIR(st.st_mode)
+++ /dev/null
-# Print tracebacks, with a dump of local variables.
-# Also an interactive stack trace browser.
-# Note -- this module is obsolete -- use pdb.pm() instead.
-
-import sys
-import os
-from stat import *
-import linecache
-
-def br(): browser(sys.last_traceback)
-
-def tb(): printtb(sys.last_traceback)
-
-def browser(tb):
- if not tb:
- print 'No traceback.'
- return
- tblist = []
- while tb:
- tblist.append(tb)
- tb = tb.tb_next
- ptr = len(tblist)-1
- tb = tblist[ptr]
- while 1:
- if tb != tblist[ptr]:
- tb = tblist[ptr]
- print `ptr` + ':',
- printtbheader(tb)
- try:
- line = raw_input('TB: ')
- except KeyboardInterrupt:
- print '\n[Interrupted]'
- break
- except EOFError:
- print '\n[EOF]'
- break
- cmd = line.strip()
- if cmd:
- if cmd == 'quit':
- break
- elif cmd == 'list':
- browserlist(tb)
- elif cmd == 'up':
- if ptr-1 >= 0: ptr = ptr-1
- else: print 'Bottom of stack.'
- elif cmd == 'down':
- if ptr+1 < len(tblist): ptr = ptr+1
- else: print 'Top of stack.'
- elif cmd == 'locals':
- printsymbols(tb.tb_frame.f_locals)
- elif cmd == 'globals':
- printsymbols(tb.tb_frame.f_globals)
- elif cmd in ('?', 'help'):
- browserhelp()
- else:
- browserexec(tb, cmd)
-
-def browserlist(tb):
- filename = tb.tb_frame.f_code.co_filename
- lineno = tb.tb_lineno
- last = lineno
- first = max(1, last-10)
- for i in range(first, last+1):
- if i == lineno: prefix = '***' + `i`.rjust(4) + ':'
- else: prefix = `i`.rjust(7) + ':'
- line = linecache.getline(filename, i)
- if line[-1:] == '\n': line = line[:-1]
- print prefix + line
-
-def browserexec(tb, cmd):
- locals = tb.tb_frame.f_locals
- globals = tb.tb_frame.f_globals
- try:
- exec cmd+'\n' in globals, locals
- except:
- t, v = sys.exc_info()[:2]
- print '*** Exception:',
- if type(t) is type(''):
- print t,
- else:
- print t.__name__,
- if v is not None:
- print ':', v,
- print
- print 'Type help to get help.'
-
-def browserhelp():
- print
- print ' This is the traceback browser. Commands are:'
- print ' up : move one level up in the call stack'
- print ' down : move one level down in the call stack'
- print ' locals : print all local variables at this level'
- print ' globals : print all global variables at this level'
- print ' list : list source code around the failure'
- print ' help : print help (what you are reading now)'
- print ' quit : back to command interpreter'
- print ' Typing any other 1-line statement will execute it'
- print ' using the current level\'s symbol tables'
- print
-
-def printtb(tb):
- while tb:
- print1tb(tb)
- tb = tb.tb_next
-
-def print1tb(tb):
- printtbheader(tb)
- if tb.tb_frame.f_locals is not tb.tb_frame.f_globals:
- printsymbols(tb.tb_frame.f_locals)
-
-def printtbheader(tb):
- filename = tb.tb_frame.f_code.co_filename
- lineno = tb.tb_lineno
- info = '"' + filename + '"(' + `lineno` + ')'
- line = linecache.getline(filename, lineno)
- if line:
- info = info + ': ' + line.strip()
- print info
-
-def printsymbols(d):
- keys = d.keys()
- keys.sort()
- for name in keys:
- print ' ' + name.ljust(12) + ':',
- printobject(d[name], 4)
- print
-
-def printobject(v, maxlevel):
- if v is None:
- print 'None',
- elif type(v) in (type(0), type(0.0)):
- print v,
- elif type(v) is type(''):
- if len(v) > 20:
- print `v[:17] + '...'`,
- else:
- print `v`,
- elif type(v) is type(()):
- print '(',
- printlist(v, maxlevel)
- print ')',
- elif type(v) is type([]):
- print '[',
- printlist(v, maxlevel)
- print ']',
- elif type(v) is type({}):
- print '{',
- printdict(v, maxlevel)
- print '}',
- else:
- print v,
-
-def printlist(v, maxlevel):
- n = len(v)
- if n == 0: return
- if maxlevel <= 0:
- print '...',
- return
- for i in range(min(6, n)):
- printobject(v[i], maxlevel-1)
- if i+1 < n: print ',',
- if n > 6: print '...',
-
-def printdict(v, maxlevel):
- keys = v.keys()
- n = len(keys)
- if n == 0: return
- if maxlevel <= 0:
- print '...',
- return
- keys.sort()
- for i in range(min(6, n)):
- key = keys[i]
- print `key` + ':',
- printobject(v[key], maxlevel-1)
- if i+1 < n: print ',',
- if n > 6: print '...',
+++ /dev/null
-"""Parse a timezone specification."""
-
-# XXX Unfinished.
-# XXX Only the typical form "XXXhhYYY;ddd/hh,ddd/hh" is currently supported.
-
-import warnings
-warnings.warn(
- "The tzparse module is obsolete and will disappear in the future",
- DeprecationWarning)
-
-tzpat = ('^([A-Z][A-Z][A-Z])([-+]?[0-9]+)([A-Z][A-Z][A-Z]);'
- '([0-9]+)/([0-9]+),([0-9]+)/([0-9]+)$')
-
-tzprog = None
-
-def tzparse(tzstr):
- """Given a timezone spec, return a tuple of information
- (tzname, delta, dstname, daystart, hourstart, dayend, hourend),
- where 'tzname' is the name of the timezone, 'delta' is the offset
- in hours from GMT, 'dstname' is the name of the daylight-saving
- timezone, and 'daystart'/'hourstart' and 'dayend'/'hourend'
- specify the starting and ending points for daylight saving time."""
- global tzprog
- if tzprog is None:
- import re
- tzprog = re.compile(tzpat)
- match = tzprog.match(tzstr)
- if not match:
- raise ValueError, 'not the TZ syntax I understand'
- subs = []
- for i in range(1, 8):
- subs.append(match.group(i))
- for i in (1, 3, 4, 5, 6):
- subs[i] = eval(subs[i])
- [tzname, delta, dstname, daystart, hourstart, dayend, hourend] = subs
- return (tzname, delta, dstname, daystart, hourstart, dayend, hourend)
-
-def tzlocaltime(secs, params):
- """Given a Unix time in seconds and a tuple of information about
- a timezone as returned by tzparse(), return the local time in the
- form (year, month, day, hour, min, sec, yday, wday, tzname)."""
- import time
- (tzname, delta, dstname, daystart, hourstart, dayend, hourend) = params
- year, month, days, hours, mins, secs, yday, wday, isdst = \
- time.gmtime(secs - delta*3600)
- if (daystart, hourstart) <= (yday+1, hours) < (dayend, hourend):
- tzname = dstname
- hours = hours + 1
- return year, month, days, hours, mins, secs, yday, wday, tzname
-
-def tzset():
- """Determine the current timezone from the "TZ" environment variable."""
- global tzparams, timezone, altzone, daylight, tzname
- import os
- tzstr = os.environ['TZ']
- tzparams = tzparse(tzstr)
- timezone = tzparams[1] * 3600
- altzone = timezone - 3600
- daylight = 1
- tzname = tzparams[0], tzparams[2]
-
-def isdst(secs):
- """Return true if daylight-saving time is in effect for the given
- Unix time in the current timezone."""
- import time
- (tzname, delta, dstname, daystart, hourstart, dayend, hourend) = \
- tzparams
- year, month, days, hours, mins, secs, yday, wday, isdst = \
- time.gmtime(secs - delta*3600)
- return (daystart, hourstart) <= (yday+1, hours) < (dayend, hourend)
-
-tzset()
-
-def localtime(secs):
- """Get the local time in the current timezone."""
- return tzlocaltime(secs, tzparams)
-
-def test():
- from time import asctime, gmtime
- import time, sys
- now = time.time()
- x = localtime(now)
- tm = x[:-1] + (0,)
- print 'now =', now, '=', asctime(tm), x[-1]
- now = now - now % (24*3600)
- if sys.argv[1:]: now = now + eval(sys.argv[1])
- x = gmtime(now)
- tm = x[:-1] + (0,)
- print 'gmtime =', now, '=', asctime(tm), 'yday =', x[-2]
- jan1 = now - x[-2]*24*3600
- x = localtime(jan1)
- tm = x[:-1] + (0,)
- print 'jan1 =', jan1, '=', asctime(tm), x[-1]
- for d in range(85, 95) + range(265, 275):
- t = jan1 + d*24*3600
- x = localtime(t)
- tm = x[:-1] + (0,)
- print 'd =', d, 't =', t, '=', asctime(tm), x[-1]
+++ /dev/null
-# Module 'util' -- some useful functions that don't fit elsewhere
-
-# NB: These are now built-in functions, but this module is provided
-# for compatibility. Don't use in new programs unless you need backward
-# compatibility (i.e. need to run with old interpreters).
-
-
-# Remove an item from a list.
-# No complaints if it isn't in the list at all.
-# If it occurs more than once, remove the first occurrence.
-#
-def remove(item, list):
- if item in list: list.remove(item)
-
-
-# Return a string containing a file's contents.
-#
-def readfile(fn):
- return readopenfile(open(fn, 'r'))
-
-
-# Read an open file until EOF.
-#
-def readopenfile(fp):
- return fp.read()
+++ /dev/null
-from sndhdr import *
+++ /dev/null
-"""Wichman-Hill random number generator.
-
-Wichmann, B. A. & Hill, I. D. (1982)
-Algorithm AS 183:
-An efficient and portable pseudo-random number generator
-Applied Statistics 31 (1982) 188-190
-
-see also:
- Correction to Algorithm AS 183
- Applied Statistics 33 (1984) 123
-
- McLeod, A. I. (1985)
- A remark on Algorithm AS 183
- Applied Statistics 34 (1985),198-200
-
-
-USE:
-whrandom.random() yields double precision random numbers
- uniformly distributed between 0 and 1.
-
-whrandom.seed(x, y, z) must be called before whrandom.random()
- to seed the generator
-
-There is also an interface to create multiple independent
-random generators, and to choose from other ranges.
-
-
-
-Multi-threading note: the random number generator used here is not
-thread-safe; it is possible that nearly simultaneous calls in
-different theads return the same random value. To avoid this, you
-have to use a lock around all calls. (I didn't want to slow this
-down in the serial case by using a lock here.)
-"""
-
-import warnings
-warnings.warn("the whrandom module is deprecated; please use the random module",
- DeprecationWarning)
-
-# Translated by Guido van Rossum from C source provided by
-# Adrian Baddeley.
-
-
-class whrandom:
- def __init__(self, x = 0, y = 0, z = 0):
- """Initialize an instance.
- Without arguments, initialize from current time.
- With arguments (x, y, z), initialize from them."""
- self.seed(x, y, z)
-
- def seed(self, x = 0, y = 0, z = 0):
- """Set the seed from (x, y, z).
- These must be integers in the range [0, 256)."""
- if not type(x) == type(y) == type(z) == type(0):
- raise TypeError, 'seeds must be integers'
- if not (0 <= x < 256 and 0 <= y < 256 and 0 <= z < 256):
- raise ValueError, 'seeds must be in range(0, 256)'
- if 0 == x == y == z:
- # Initialize from current time
- import time
- t = long(time.time() * 256)
- t = int((t&0xffffff) ^ (t>>24))
- t, x = divmod(t, 256)
- t, y = divmod(t, 256)
- t, z = divmod(t, 256)
- # Zero is a poor seed, so substitute 1
- self._seed = (x or 1, y or 1, z or 1)
-
- def random(self):
- """Get the next random number in the range [0.0, 1.0)."""
- # This part is thread-unsafe:
- # BEGIN CRITICAL SECTION
- x, y, z = self._seed
- #
- x = (171 * x) % 30269
- y = (172 * y) % 30307
- z = (170 * z) % 30323
- #
- self._seed = x, y, z
- # END CRITICAL SECTION
- #
- return (x/30269.0 + y/30307.0 + z/30323.0) % 1.0
-
- def uniform(self, a, b):
- """Get a random number in the range [a, b)."""
- return a + (b-a) * self.random()
-
- def randint(self, a, b):
- """Get a random integer in the range [a, b] including
- both end points.
-
- (Deprecated; use randrange below.)"""
- return self.randrange(a, b+1)
-
- def choice(self, seq):
- """Choose a random element from a non-empty sequence."""
- return seq[int(self.random() * len(seq))]
-
- def randrange(self, start, stop=None, step=1, int=int, default=None):
- """Choose a random item from range(start, stop[, step]).
-
- This fixes the problem with randint() which includes the
- endpoint; in Python this is usually not what you want.
- Do not supply the 'int' and 'default' arguments."""
- # This code is a bit messy to make it fast for the
- # common case while still doing adequate error checking
- istart = int(start)
- if istart != start:
- raise ValueError, "non-integer arg 1 for randrange()"
- if stop is default:
- if istart > 0:
- return int(self.random() * istart)
- raise ValueError, "empty range for randrange()"
- istop = int(stop)
- if istop != stop:
- raise ValueError, "non-integer stop for randrange()"
- if step == 1:
- if istart < istop:
- return istart + int(self.random() *
- (istop - istart))
- raise ValueError, "empty range for randrange()"
- istep = int(step)
- if istep != step:
- raise ValueError, "non-integer step for randrange()"
- if istep > 0:
- n = (istop - istart + istep - 1) / istep
- elif istep < 0:
- n = (istop - istart + istep + 1) / istep
- else:
- raise ValueError, "zero step for randrange()"
-
- if n <= 0:
- raise ValueError, "empty range for randrange()"
- return istart + istep*int(self.random() * n)
-
-
-# Initialize from the current time
-_inst = whrandom()
-seed = _inst.seed
-random = _inst.random
-uniform = _inst.uniform
-randint = _inst.randint
-choice = _inst.choice
-randrange = _inst.randrange
+++ /dev/null
-# module 'zmod'
-
-# Compute properties of mathematical "fields" formed by taking
-# Z/n (the whole numbers modulo some whole number n) and an
-# irreducible polynomial (i.e., a polynomial with only complex zeros),
-# e.g., Z/5 and X**2 + 2.
-#
-# The field is formed by taking all possible linear combinations of
-# a set of d base vectors (where d is the degree of the polynomial).
-#
-# Note that this procedure doesn't yield a field for all combinations
-# of n and p: it may well be that some numbers have more than one
-# inverse and others have none. This is what we check.
-#
-# Remember that a field is a ring where each element has an inverse.
-# A ring has commutative addition and multiplication, a zero and a one:
-# 0*x = x*0 = 0, 0+x = x+0 = x, 1*x = x*1 = x. Also, the distributive
-# property holds: a*(b+c) = a*b + b*c.
-# (XXX I forget if this is an axiom or follows from the rules.)
-
-import poly
-
-
-# Example N and polynomial
-
-N = 5
-P = poly.plus(poly.one(0, 2), poly.one(2, 1)) # 2 + x**2
-
-
-# Return x modulo y. Returns >= 0 even if x < 0.
-
-def mod(x, y):
- return divmod(x, y)[1]
-
-
-# Normalize a polynomial modulo n and modulo p.
-
-def norm(a, n, p):
- a = poly.modulo(a, p)
- a = a[:]
- for i in range(len(a)): a[i] = mod(a[i], n)
- a = poly.normalize(a)
- return a
-
-
-# Make a list of all n^d elements of the proposed field.
-
-def make_all(mat):
- all = []
- for row in mat:
- for a in row:
- all.append(a)
- return all
-
-def make_elements(n, d):
- if d == 0: return [poly.one(0, 0)]
- sub = make_elements(n, d-1)
- all = []
- for a in sub:
- for i in range(n):
- all.append(poly.plus(a, poly.one(d-1, i)))
- return all
-
-def make_inv(all, n, p):
- x = poly.one(1, 1)
- inv = []
- for a in all:
- inv.append(norm(poly.times(a, x), n, p))
- return inv
-
-def checkfield(n, p):
- all = make_elements(n, len(p)-1)
- inv = make_inv(all, n, p)
- all1 = all[:]
- inv1 = inv[:]
- all1.sort()
- inv1.sort()
- if all1 == inv1: print 'BINGO!'
- else:
- print 'Sorry:', n, p
- print all
- print inv
-
-def rj(s, width):
- if type(s) is not type(''): s = `s`
- n = len(s)
- if n >= width: return s
- return ' '*(width - n) + s
-
-def lj(s, width):
- if type(s) is not type(''): s = `s`
- n = len(s)
- if n >= width: return s
- return s + ' '*(width - n)
+++ /dev/null
-#! /usr/bin/env python
-
-r"""Convert old ("regex") regular expressions to new syntax ("re").
-
-When imported as a module, there are two functions, with their own
-strings:
-
- convert(s, syntax=None) -- convert a regex regular expression to re syntax
-
- quote(s) -- return a quoted string literal
-
-When used as a script, read a Python string literal (or any other
-expression evaluating to a string) from stdin, and write the
-translated expression to stdout as a string literal. Unless stdout is
-a tty, no trailing \n is written to stdout. This is done so that it
-can be used with Emacs C-U M-| (shell-command-on-region with argument
-which filters the region through the shell command).
-
-No attempt has been made at coding for performance.
-
-Translation table...
-
- \( ( (unless RE_NO_BK_PARENS set)
- \) ) (unless RE_NO_BK_PARENS set)
- \| | (unless RE_NO_BK_VBAR set)
- \< \b (not quite the same, but alla...)
- \> \b (not quite the same, but alla...)
- \` \A
- \' \Z
-
-Not translated...
-
- .
- ^
- $
- *
- + (unless RE_BK_PLUS_QM set, then to \+)
- ? (unless RE_BK_PLUS_QM set, then to \?)
- \
- \b
- \B
- \w
- \W
- \1 ... \9
-
-Special cases...
-
- Non-printable characters are always replaced by their 3-digit
- escape code (except \t, \n, \r, which use mnemonic escapes)
-
- Newline is turned into | when RE_NEWLINE_OR is set
-
-XXX To be done...
-
- [...] (different treatment of backslashed items?)
- [^...] (different treatment of backslashed items?)
- ^ $ * + ? (in some error contexts these are probably treated differently)
- \vDD \DD (in the regex docs but only works when RE_ANSI_HEX set)
-
-"""
-
-
-import warnings
-warnings.filterwarnings("ignore", ".* regex .*", DeprecationWarning, __name__,
- append=1)
-
-import regex
-from regex_syntax import * # RE_*
-
-__all__ = ["convert","quote"]
-
-# Default translation table
-mastertable = {
- r'\<': r'\b',
- r'\>': r'\b',
- r'\`': r'\A',
- r'\'': r'\Z',
- r'\(': '(',
- r'\)': ')',
- r'\|': '|',
- '(': r'\(',
- ')': r'\)',
- '|': r'\|',
- '\t': r'\t',
- '\n': r'\n',
- '\r': r'\r',
-}
-
-
-def convert(s, syntax=None):
- """Convert a regex regular expression to re syntax.
-
- The first argument is the regular expression, as a string object,
- just like it would be passed to regex.compile(). (I.e., pass the
- actual string object -- string quotes must already have been
- removed and the standard escape processing has already been done,
- e.g. by eval().)
-
- The optional second argument is the regex syntax variant to be
- used. This is an integer mask as passed to regex.set_syntax();
- the flag bits are defined in regex_syntax. When not specified, or
- when None is given, the current regex syntax mask (as retrieved by
- regex.get_syntax()) is used -- which is 0 by default.
-
- The return value is a regular expression, as a string object that
- could be passed to re.compile(). (I.e., no string quotes have
- been added -- use quote() below, or repr().)
-
- The conversion is not always guaranteed to be correct. More
- syntactical analysis should be performed to detect borderline
- cases and decide what to do with them. For example, 'x*?' is not
- translated correctly.
-
- """
- table = mastertable.copy()
- if syntax is None:
- syntax = regex.get_syntax()
- if syntax & RE_NO_BK_PARENS:
- del table[r'\('], table[r'\)']
- del table['('], table[')']
- if syntax & RE_NO_BK_VBAR:
- del table[r'\|']
- del table['|']
- if syntax & RE_BK_PLUS_QM:
- table['+'] = r'\+'
- table['?'] = r'\?'
- table[r'\+'] = '+'
- table[r'\?'] = '?'
- if syntax & RE_NEWLINE_OR:
- table['\n'] = '|'
- res = ""
-
- i = 0
- end = len(s)
- while i < end:
- c = s[i]
- i = i+1
- if c == '\\':
- c = s[i]
- i = i+1
- key = '\\' + c
- key = table.get(key, key)
- res = res + key
- else:
- c = table.get(c, c)
- res = res + c
- return res
-
-
-def quote(s, quote=None):
- """Convert a string object to a quoted string literal.
-
- This is similar to repr() but will return a "raw" string (r'...'
- or r"...") when the string contains backslashes, instead of
- doubling all backslashes. The resulting string does *not* always
- evaluate to the same string as the original; however it will do
- just the right thing when passed into re.compile().
-
- The optional second argument forces the string quote; it must be
- a single character which is a valid Python string quote.
-
- """
- if quote is None:
- q = "'"
- altq = "'"
- if q in s and altq not in s:
- q = altq
- else:
- assert quote in ('"', "'", '"""', "'''")
- q = quote
- res = q
- for c in s:
- if c == q: c = '\\' + c
- elif c < ' ' or c > '~': c = "\\%03o" % ord(c)
- res = res + c
- res = res + q
- if '\\' in res:
- res = 'r' + res
- return res
-
-
-def main():
- """Main program -- called when run as a script."""
- import sys
- s = eval(sys.stdin.read())
- sys.stdout.write(quote(convert(s)))
- if sys.stdout.isatty():
- sys.stdout.write("\n")
-
-
-if __name__ == '__main__':
- main()
+++ /dev/null
-"""Constants for selecting regexp syntaxes for the obsolete regex module.
-
-This module is only for backward compatibility. "regex" has now
-been replaced by the new regular expression module, "re".
-
-These bits are passed to regex.set_syntax() to choose among
-alternative regexp syntaxes.
-"""
-
-# 1 means plain parentheses serve as grouping, and backslash
-# parentheses are needed for literal searching.
-# 0 means backslash-parentheses are grouping, and plain parentheses
-# are for literal searching.
-RE_NO_BK_PARENS = 1
-
-# 1 means plain | serves as the "or"-operator, and \| is a literal.
-# 0 means \| serves as the "or"-operator, and | is a literal.
-RE_NO_BK_VBAR = 2
-
-# 0 means plain + or ? serves as an operator, and \+, \? are literals.
-# 1 means \+, \? are operators and plain +, ? are literals.
-RE_BK_PLUS_QM = 4
-
-# 1 means | binds tighter than ^ or $.
-# 0 means the contrary.
-RE_TIGHT_VBAR = 8
-
-# 1 means treat \n as an _OR operator
-# 0 means treat it as a normal character
-RE_NEWLINE_OR = 16
-
-# 0 means that a special characters (such as *, ^, and $) always have
-# their special meaning regardless of the surrounding context.
-# 1 means that special characters may act as normal characters in some
-# contexts. Specifically, this applies to:
-# ^ - only special at the beginning, or after ( or |
-# $ - only special at the end, or before ) or |
-# *, +, ? - only special when not after the beginning, (, or |
-RE_CONTEXT_INDEP_OPS = 32
-
-# ANSI sequences (\n etc) and \xhh
-RE_ANSI_HEX = 64
-
-# No GNU extensions
-RE_NO_GNU_EXTENSIONS = 128
-
-# Now define combinations of bits for the standard possibilities.
-RE_SYNTAX_AWK = (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS)
-RE_SYNTAX_EGREP = (RE_SYNTAX_AWK | RE_NEWLINE_OR)
-RE_SYNTAX_GREP = (RE_BK_PLUS_QM | RE_NEWLINE_OR)
-RE_SYNTAX_EMACS = 0
-
-# (Python's obsolete "regexp" module used a syntax similar to awk.)
+++ /dev/null
-"""Regexp-based split and replace using the obsolete regex module.
-
-This module is only for backward compatibility. These operations
-are now provided by the new regular expression module, "re".
-
-sub(pat, repl, str): replace first occurrence of pattern in string
-gsub(pat, repl, str): replace all occurrences of pattern in string
-split(str, pat, maxsplit): split string using pattern as delimiter
-splitx(str, pat, maxsplit): split string using pattern as delimiter plus
- return delimiters
-"""
-
-import warnings
-warnings.warn("the regsub module is deprecated; please use re.sub()",
- DeprecationWarning)
-
-# Ignore further deprecation warnings about this module
-warnings.filterwarnings("ignore", "", DeprecationWarning, __name__)
-
-import regex
-
-__all__ = ["sub","gsub","split","splitx","capwords"]
-
-# Replace first occurrence of pattern pat in string str by replacement
-# repl. If the pattern isn't found, the string is returned unchanged.
-# The replacement may contain references \digit to subpatterns and
-# escaped backslashes. The pattern may be a string or an already
-# compiled pattern.
-
-def sub(pat, repl, str):
- prog = compile(pat)
- if prog.search(str) >= 0:
- regs = prog.regs
- a, b = regs[0]
- str = str[:a] + expand(repl, regs, str) + str[b:]
- return str
-
-
-# Replace all (non-overlapping) occurrences of pattern pat in string
-# str by replacement repl. The same rules as for sub() apply.
-# Empty matches for the pattern are replaced only when not adjacent to
-# a previous match, so e.g. gsub('', '-', 'abc') returns '-a-b-c-'.
-
-def gsub(pat, repl, str):
- prog = compile(pat)
- new = ''
- start = 0
- first = 1
- while prog.search(str, start) >= 0:
- regs = prog.regs
- a, b = regs[0]
- if a == b == start and not first:
- if start >= len(str) or prog.search(str, start+1) < 0:
- break
- regs = prog.regs
- a, b = regs[0]
- new = new + str[start:a] + expand(repl, regs, str)
- start = b
- first = 0
- new = new + str[start:]
- return new
-
-
-# Split string str in fields separated by delimiters matching pattern
-# pat. Only non-empty matches for the pattern are considered, so e.g.
-# split('abc', '') returns ['abc'].
-# The optional 3rd argument sets the number of splits that are performed.
-
-def split(str, pat, maxsplit = 0):
- return intsplit(str, pat, maxsplit, 0)
-
-# Split string str in fields separated by delimiters matching pattern
-# pat. Only non-empty matches for the pattern are considered, so e.g.
-# split('abc', '') returns ['abc']. The delimiters are also included
-# in the list.
-# The optional 3rd argument sets the number of splits that are performed.
-
-
-def splitx(str, pat, maxsplit = 0):
- return intsplit(str, pat, maxsplit, 1)
-
-# Internal function used to implement split() and splitx().
-
-def intsplit(str, pat, maxsplit, retain):
- prog = compile(pat)
- res = []
- start = next = 0
- splitcount = 0
- while prog.search(str, next) >= 0:
- regs = prog.regs
- a, b = regs[0]
- if a == b:
- next = next + 1
- if next >= len(str):
- break
- else:
- res.append(str[start:a])
- if retain:
- res.append(str[a:b])
- start = next = b
- splitcount = splitcount + 1
- if (maxsplit and (splitcount >= maxsplit)):
- break
- res.append(str[start:])
- return res
-
-
-# Capitalize words split using a pattern
-
-def capwords(str, pat='[^a-zA-Z0-9_]+'):
- words = splitx(str, pat)
- for i in range(0, len(words), 2):
- words[i] = words[i].capitalize()
- return "".join(words)
-
-
-# Internal subroutines:
-# compile(pat): compile a pattern, caching already compiled patterns
-# expand(repl, regs, str): expand \digit escapes in replacement string
-
-
-# Manage a cache of compiled regular expressions.
-#
-# If the pattern is a string a compiled version of it is returned. If
-# the pattern has been used before we return an already compiled
-# version from the cache; otherwise we compile it now and save the
-# compiled version in the cache, along with the syntax it was compiled
-# with. Instead of a string, a compiled regular expression can also
-# be passed.
-
-cache = {}
-
-def compile(pat):
- if type(pat) != type(''):
- return pat # Assume it is a compiled regex
- key = (pat, regex.get_syntax())
- if key in cache:
- prog = cache[key] # Get it from the cache
- else:
- prog = cache[key] = regex.compile(pat)
- return prog
-
-
-def clear_cache():
- global cache
- cache = {}
-
-
-# Expand \digit in the replacement.
-# Each occurrence of \digit is replaced by the substring of str
-# indicated by regs[digit]. To include a literal \ in the
-# replacement, double it; other \ escapes are left unchanged (i.e.
-# the \ and the following character are both copied).
-
-def expand(repl, regs, str):
- if '\\' not in repl:
- return repl
- new = ''
- i = 0
- ord0 = ord('0')
- while i < len(repl):
- c = repl[i]; i = i+1
- if c != '\\' or i >= len(repl):
- new = new + c
- else:
- c = repl[i]; i = i+1
- if '0' <= c <= '9':
- a, b = regs[ord(c)-ord0]
- new = new + str[a:b]
- elif c == '\\':
- new = new + c
- else:
- new = new + '\\' + c
- return new
-
-
-# Test program, reads sequences "pat repl str" from stdin.
-# Optional argument specifies pattern used to split lines.
-
-def test():
- import sys
- if sys.argv[1:]:
- delpat = sys.argv[1]
- else:
- delpat = '[ \t\n]+'
- while 1:
- if sys.stdin.isatty(): sys.stderr.write('--> ')
- line = sys.stdin.readline()
- if not line: break
- if line[-1] == '\n': line = line[:-1]
- fields = split(line, delpat)
- if len(fields) != 3:
- print 'Sorry, not three fields'
- print 'split:', repr(fields)
- continue
- [pat, repl, str] = split(line, delpat)
- print 'sub :', repr(sub(pat, repl, str))
- print 'gsub:', repr(gsub(pat, repl, str))
ok_builtin_modules = ('audioop', 'array', 'binascii',
'cmath', 'errno', 'imageop',
'marshal', 'math', 'md5', 'operator',
- 'parser', 'regex', 'select',
+ 'parser', 'select',
'sha', '_sre', 'strop', 'struct', 'time',
'_weakref')
self.check_all("quopri")
self.check_all("random")
self.check_all("re")
- self.check_all("reconvert")
- self.check_all("regsub")
self.check_all("repr")
self.check_all("rexec")
self.check_all("rfc822")
+++ /dev/null
-from test.test_support import verbose, sortdict
-import warnings
-warnings.filterwarnings("ignore", "the regex module is deprecated",
- DeprecationWarning, __name__)
-import regex
-from regex_syntax import *
-
-re = 'a+b+c+'
-print 'no match:', regex.match(re, 'hello aaaabcccc world')
-print 'successful search:', regex.search(re, 'hello aaaabcccc world')
-try:
- cre = regex.compile('\(' + re)
-except regex.error:
- print 'caught expected exception'
-else:
- print 'expected regex.error not raised'
-
-print 'failed awk syntax:', regex.search('(a+)|(b+)', 'cdb')
-prev = regex.set_syntax(RE_SYNTAX_AWK)
-print 'successful awk syntax:', regex.search('(a+)|(b+)', 'cdb')
-regex.set_syntax(prev)
-print 'failed awk syntax:', regex.search('(a+)|(b+)', 'cdb')
-
-re = '\(<one>[0-9]+\) *\(<two>[0-9]+\)'
-print 'matching with group names and compile()'
-cre = regex.compile(re)
-print cre.match('801 999')
-try:
- print cre.group('one')
-except regex.error:
- print 'caught expected exception'
-else:
- print 'expected regex.error not raised'
-
-print 'matching with group names and symcomp()'
-cre = regex.symcomp(re)
-print cre.match('801 999')
-print cre.group(0)
-print cre.group('one')
-print cre.group(1, 2)
-print cre.group('one', 'two')
-print 'realpat:', cre.realpat
-print 'groupindex:', sortdict(cre.groupindex)
-
-re = 'world'
-cre = regex.compile(re)
-print 'not case folded search:', cre.search('HELLO WORLD')
-cre = regex.compile(re, regex.casefold)
-print 'case folded search:', cre.search('HELLO WORLD')
-
-print '__members__:', cre.__members__
-print 'regs:', cre.regs
-print 'last:', cre.last
-print 'translate:', len(cre.translate)
-print 'givenpat:', cre.givenpat
-
-print 'match with pos:', cre.match('hello world', 7)
-print 'search with pos:', cre.search('hello world there world', 7)
-print 'bogus group:', cre.group(0, 1, 3)
-try:
- print 'no name:', cre.group('one')
-except regex.error:
- print 'caught expected exception'
-else:
- print 'expected regex.error not raised'
-
-from regex_tests import *
-if verbose: print 'Running regex_tests test suite'
-
-for t in tests:
- pattern=s=outcome=repl=expected=None
- if len(t)==5:
- pattern, s, outcome, repl, expected = t
- elif len(t)==3:
- pattern, s, outcome = t
- else:
- raise ValueError, ('Test tuples should have 3 or 5 fields',t)
-
- try:
- obj=regex.compile(pattern)
- except regex.error:
- if outcome==SYNTAX_ERROR: pass # Expected a syntax error
- else:
- # Regex syntax errors aren't yet reported, so for
- # the official test suite they'll be quietly ignored.
- pass
- #print '=== Syntax error:', t
- else:
- try:
- result=obj.search(s)
- except regex.error, msg:
- print '=== Unexpected exception', t, repr(msg)
- if outcome==SYNTAX_ERROR:
- # This should have been a syntax error; forget it.
- pass
- elif outcome==FAIL:
- if result==-1: pass # No match, as expected
- else: print '=== Succeeded incorrectly', t
- elif outcome==SUCCEED:
- if result!=-1:
- # Matched, as expected, so now we compute the
- # result string and compare it to our expected result.
- start, end = obj.regs[0]
- found=s[start:end]
- groups=obj.group(1,2,3,4,5,6,7,8,9,10)
- vardict=vars()
- for i in range(len(groups)):
- vardict['g'+str(i+1)]=str(groups[i])
- repl=eval(repl)
- if repl!=expected:
- print '=== grouping error', t, repr(repl)+' should be '+repr(expected)
- else:
- print '=== Failed incorrectly', t
import profile
import pstats
import py_compile
-#import reconvert
import repr
try:
import rlcompleter # not available on Windows
#
# Some modules that are normally always on:
- exts.append( Extension('regex', ['regexmodule.c', 'regexpr.c']) )
-
exts.append( Extension('_weakref', ['_weakref.c']) )
exts.append( Extension('_symtable', ['symtablemodule.c']) )
Extension Modules
-----------------
-- Swapped re and sre, so help(re) provides full help. importing sre
+- Everything under lib-old was removed. This includes the following modules:
+ Para, addpack, cmp, cmpcache, codehack, dircmp, dump, find, fmt, grep,
+ lockfile, newdir, ni, packmail, poly, rand, statcache, tb, tzparse,
+ util, whatsound, whrandom, zmod
+
+- The following modules were removed: regsub, reconvert, regex, regex_syntax.
+
+- re and sre were swapped, so help(re) provides full help. importing sre
is deprecated. The undocumented re.engine variable no longer exists.
- Bug #1448490: Fixed a bug that ISO-2022 codecs could not handle
rand Don't use unless you want compatibility with C's rand().
random Random variable generators
re Regular Expressions.
-reconvert Convert old ("regex") regular expressions to new syntax
- ("re").
repr Redo repr() but with limits on most sizes.
rexec Restricted execution facilities ("safe" exec, eval, etc).
rfc822 RFC-822 message manipulation class.
array Obj efficiently representing arrays of basic values
math Math functions of C standard
time Time-related functions (also the newer datetime module)
- regex Regular expression matching operations
marshal Read and write some python values in binary format
struct Convert between python values and C structs
+++ /dev/null
-/*
-XXX support range parameter on search
-XXX support mstop parameter on search
-*/
-
-
-/* Regular expression objects */
-/* This uses Tatu Ylonen's copyleft-free reimplementation of
- GNU regular expressions */
-
-#include "Python.h"
-
-#include <ctype.h>
-
-#include "regexpr.h"
-
-static PyObject *RegexError; /* Exception */
-
-typedef struct {
- PyObject_HEAD
- struct re_pattern_buffer re_patbuf; /* The compiled expression */
- struct re_registers re_regs; /* The registers from the last match */
- char re_fastmap[256]; /* Storage for fastmap */
- PyObject *re_translate; /* String object for translate table */
- PyObject *re_lastok; /* String object last matched/searched */
- PyObject *re_groupindex; /* Group name to index dictionary */
- PyObject *re_givenpat; /* Pattern with symbolic groups */
- PyObject *re_realpat; /* Pattern without symbolic groups */
-} regexobject;
-
-/* Regex object methods */
-
-static void
-reg_dealloc(regexobject *re)
-{
- if (re->re_patbuf.buffer)
- free(re->re_patbuf.buffer);
- Py_XDECREF(re->re_translate);
- Py_XDECREF(re->re_lastok);
- Py_XDECREF(re->re_groupindex);
- Py_XDECREF(re->re_givenpat);
- Py_XDECREF(re->re_realpat);
- PyObject_Del(re);
-}
-
-static PyObject *
-makeresult(struct re_registers *regs)
-{
- PyObject *v;
- int i;
- static PyObject *filler = NULL;
-
- if (filler == NULL) {
- filler = Py_BuildValue("(ii)", -1, -1);
- if (filler == NULL)
- return NULL;
- }
- v = PyTuple_New(RE_NREGS);
- if (v == NULL)
- return NULL;
-
- for (i = 0; i < RE_NREGS; i++) {
- int lo = regs->start[i];
- int hi = regs->end[i];
- PyObject *w;
- if (lo == -1 && hi == -1) {
- w = filler;
- Py_INCREF(w);
- }
- else
- w = Py_BuildValue("(ii)", lo, hi);
- if (w == NULL || PyTuple_SetItem(v, i, w) < 0) {
- Py_DECREF(v);
- return NULL;
- }
- }
- return v;
-}
-
-static PyObject *
-regobj_match(regexobject *re, PyObject *args)
-{
- PyObject *argstring;
- char *buffer;
- int size;
- int offset = 0;
- int result;
-
- if (!PyArg_ParseTuple(args, "O|i:match", &argstring, &offset))
- return NULL;
- if (!PyArg_Parse(argstring, "t#", &buffer, &size))
- return NULL;
-
- if (offset < 0 || offset > size) {
- PyErr_SetString(RegexError, "match offset out of range");
- return NULL;
- }
- Py_XDECREF(re->re_lastok);
- re->re_lastok = NULL;
- result = _Py_re_match(&re->re_patbuf, (unsigned char *)buffer, size, offset,
- &re->re_regs);
- if (result < -1) {
- /* Serious failure of some sort; if re_match didn't
- set an exception, raise a generic error */
- if (!PyErr_Occurred())
- PyErr_SetString(RegexError, "match failure");
- return NULL;
- }
- if (result >= 0) {
- Py_INCREF(argstring);
- re->re_lastok = argstring;
- }
- return PyInt_FromLong((long)result); /* Length of the match or -1 */
-}
-
-static PyObject *
-regobj_search(regexobject *re, PyObject *args)
-{
- PyObject *argstring;
- char *buffer;
- int size;
- int offset = 0;
- int range;
- int result;
-
- if (!PyArg_ParseTuple(args, "O|i:search", &argstring, &offset))
- return NULL;
- if (!PyArg_Parse(argstring, "t#:search", &buffer, &size))
- return NULL;
-
- if (offset < 0 || offset > size) {
- PyErr_SetString(RegexError, "search offset out of range");
- return NULL;
- }
- /* NB: In Emacs 18.57, the documentation for re_search[_2] and
- the implementation don't match: the documentation states that
- |range| positions are tried, while the code tries |range|+1
- positions. It seems more productive to believe the code! */
- range = size - offset;
- Py_XDECREF(re->re_lastok);
- re->re_lastok = NULL;
- result = _Py_re_search(&re->re_patbuf, (unsigned char *)buffer, size, offset, range,
- &re->re_regs);
- if (result < -1) {
- /* Serious failure of some sort; if re_match didn't
- set an exception, raise a generic error */
- if (!PyErr_Occurred())
- PyErr_SetString(RegexError, "match failure");
- return NULL;
- }
- if (result >= 0) {
- Py_INCREF(argstring);
- re->re_lastok = argstring;
- }
- return PyInt_FromLong((long)result); /* Position of the match or -1 */
-}
-
-/* get the group from the regex where index can be a string (group name) or
- an integer index [0 .. 99]
- */
-static PyObject*
-group_from_index(regexobject *re, PyObject *index)
-{
- int i, a, b;
- char *v;
-
- if (PyString_Check(index))
- if (re->re_groupindex == NULL ||
- !(index = PyDict_GetItem(re->re_groupindex, index)))
- {
- PyErr_SetString(RegexError,
- "group() group name doesn't exist");
- return NULL;
- }
-
- i = PyInt_AsLong(index);
- if (i == -1 && PyErr_Occurred())
- return NULL;
-
- if (i < 0 || i >= RE_NREGS) {
- PyErr_SetString(RegexError, "group() index out of range");
- return NULL;
- }
- if (re->re_lastok == NULL) {
- PyErr_SetString(RegexError,
- "group() only valid after successful match/search");
- return NULL;
- }
- a = re->re_regs.start[i];
- b = re->re_regs.end[i];
- if (a < 0 || b < 0) {
- Py_INCREF(Py_None);
- return Py_None;
- }
-
- if (!(v = PyString_AsString(re->re_lastok)))
- return NULL;
-
- return PyString_FromStringAndSize(v+a, b-a);
-}
-
-
-static PyObject *
-regobj_group(regexobject *re, PyObject *args)
-{
- int n = PyTuple_Size(args);
- int i;
- PyObject *res = NULL;
-
- if (n < 0)
- return NULL;
- if (n == 0) {
- PyErr_SetString(PyExc_TypeError, "not enough arguments");
- return NULL;
- }
- if (n == 1) {
- /* return value is a single string */
- PyObject *index = PyTuple_GetItem(args, 0);
- if (!index)
- return NULL;
-
- return group_from_index(re, index);
- }
-
- /* return value is a tuple */
- if (!(res = PyTuple_New(n)))
- return NULL;
-
- for (i = 0; i < n; i++) {
- PyObject *index = PyTuple_GetItem(args, i);
- PyObject *group = NULL;
-
- if (!index)
- goto finally;
- if (!(group = group_from_index(re, index)))
- goto finally;
- if (PyTuple_SetItem(res, i, group) < 0)
- goto finally;
- }
- return res;
-
- finally:
- Py_DECREF(res);
- return NULL;
-}
-
-
-static struct PyMethodDef reg_methods[] = {
- {"match", (PyCFunction)regobj_match, METH_VARARGS},
- {"search", (PyCFunction)regobj_search, METH_VARARGS},
- {"group", (PyCFunction)regobj_group, METH_VARARGS},
- {NULL, NULL} /* sentinel */
-};
-
-
-\f
-static char* members[] = {
- "last", "regs", "translate",
- "groupindex", "realpat", "givenpat",
- NULL
-};
-
-
-static PyObject *
-regobj_getattr(regexobject *re, char *name)
-{
- if (strcmp(name, "regs") == 0) {
- if (re->re_lastok == NULL) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- return makeresult(&re->re_regs);
- }
- if (strcmp(name, "last") == 0) {
- if (re->re_lastok == NULL) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- Py_INCREF(re->re_lastok);
- return re->re_lastok;
- }
- if (strcmp(name, "translate") == 0) {
- if (re->re_translate == NULL) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- Py_INCREF(re->re_translate);
- return re->re_translate;
- }
- if (strcmp(name, "groupindex") == 0) {
- if (re->re_groupindex == NULL) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- Py_INCREF(re->re_groupindex);
- return re->re_groupindex;
- }
- if (strcmp(name, "realpat") == 0) {
- if (re->re_realpat == NULL) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- Py_INCREF(re->re_realpat);
- return re->re_realpat;
- }
- if (strcmp(name, "givenpat") == 0) {
- if (re->re_givenpat == NULL) {
- Py_INCREF(Py_None);
- return Py_None;
- }
- Py_INCREF(re->re_givenpat);
- return re->re_givenpat;
- }
- if (strcmp(name, "__members__") == 0) {
- int i = 0;
- PyObject *list = NULL;
-
- /* okay, so it's unlikely this list will change that often.
- still, it's easier to change it in just one place.
- */
- while (members[i])
- i++;
- if (!(list = PyList_New(i)))
- return NULL;
-
- i = 0;
- while (members[i]) {
- PyObject* v = PyString_FromString(members[i]);
- if (!v || PyList_SetItem(list, i, v) < 0) {
- Py_DECREF(list);
- return NULL;
- }
- i++;
- }
- return list;
- }
- return Py_FindMethod(reg_methods, (PyObject *)re, name);
-}
-
-static PyTypeObject Regextype = {
- PyObject_HEAD_INIT(NULL)
- 0, /*ob_size*/
- "regex.regex", /*tp_name*/
- sizeof(regexobject), /*tp_size*/
- 0, /*tp_itemsize*/
- /* methods */
- (destructor)reg_dealloc, /*tp_dealloc*/
- 0, /*tp_print*/
- (getattrfunc)regobj_getattr, /*tp_getattr*/
- 0, /*tp_setattr*/
- 0, /*tp_compare*/
- 0, /*tp_repr*/
-};
-
-/* reference counting invariants:
- pattern: borrowed
- translate: borrowed
- givenpat: borrowed
- groupindex: transferred
-*/
-static PyObject *
-newregexobject(PyObject *pattern, PyObject *translate, PyObject *givenpat, PyObject *groupindex)
-{
- regexobject *re;
- char *pat;
- int size;
-
- if (!PyArg_Parse(pattern, "t#", &pat, &size))
- return NULL;
-
- if (translate != NULL && PyString_Size(translate) != 256) {
- PyErr_SetString(RegexError,
- "translation table must be 256 bytes");
- return NULL;
- }
- re = PyObject_New(regexobject, &Regextype);
- if (re != NULL) {
- char *error;
- re->re_patbuf.buffer = NULL;
- re->re_patbuf.allocated = 0;
- re->re_patbuf.fastmap = (unsigned char *)re->re_fastmap;
- if (translate) {
- re->re_patbuf.translate = (unsigned char *)PyString_AsString(translate);
- if (!re->re_patbuf.translate)
- goto finally;
- Py_INCREF(translate);
- }
- else
- re->re_patbuf.translate = NULL;
- re->re_translate = translate;
- re->re_lastok = NULL;
- re->re_groupindex = groupindex;
- Py_INCREF(pattern);
- re->re_realpat = pattern;
- Py_INCREF(givenpat);
- re->re_givenpat = givenpat;
- error = _Py_re_compile_pattern((unsigned char *)pat, size, &re->re_patbuf);
- if (error != NULL) {
- PyErr_SetString(RegexError, error);
- goto finally;
- }
- }
- return (PyObject *)re;
- finally:
- Py_DECREF(re);
- return NULL;
-}
-
-static PyObject *
-regex_compile(PyObject *self, PyObject *args)
-{
- PyObject *pat = NULL;
- PyObject *tran = NULL;
-
- if (!PyArg_ParseTuple(args, "S|S:compile", &pat, &tran))
- return NULL;
- return newregexobject(pat, tran, pat, NULL);
-}
-
-static PyObject *
-symcomp(PyObject *pattern, PyObject *gdict)
-{
- char *opat, *oend, *o, *n, *g, *v;
- int group_count = 0;
- int sz;
- int escaped = 0;
- char name_buf[128];
- PyObject *npattern;
- int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1;
-
- if (!(opat = PyString_AsString(pattern)))
- return NULL;
-
- if ((sz = PyString_Size(pattern)) < 0)
- return NULL;
-
- oend = opat + sz;
- o = opat;
-
- if (oend == opat) {
- Py_INCREF(pattern);
- return pattern;
- }
-
- if (!(npattern = PyString_FromStringAndSize((char*)NULL, sz)) ||
- !(n = PyString_AsString(npattern)))
- return NULL;
-
- while (o < oend) {
- if (*o == '(' && escaped == require_escape) {
- char *backtrack;
- escaped = 0;
- ++group_count;
- *n++ = *o;
- if (++o >= oend || *o != '<')
- continue;
- /* *o == '<' */
- if (o+1 < oend && *(o+1) == '>')
- continue;
- backtrack = o;
- g = name_buf;
- for (++o; o < oend;) {
- if (*o == '>') {
- PyObject *group_name = NULL;
- PyObject *group_index = NULL;
- *g++ = '\0';
- group_name = PyString_FromString(name_buf);
- group_index = PyInt_FromLong(group_count);
- if (group_name == NULL ||
- group_index == NULL ||
- PyDict_SetItem(gdict, group_name,
- group_index) != 0)
- {
- Py_XDECREF(group_name);
- Py_XDECREF(group_index);
- Py_XDECREF(npattern);
- return NULL;
- }
- Py_DECREF(group_name);
- Py_DECREF(group_index);
- ++o; /* eat the '>' */
- break;
- }
- if (!isalnum(Py_CHARMASK(*o)) && *o != '_') {
- o = backtrack;
- break;
- }
- *g++ = *o++;
- }
- }
- else if (*o == '[' && !escaped) {
- *n++ = *o;
- ++o; /* eat the char following '[' */
- *n++ = *o;
- while (o < oend && *o != ']') {
- ++o;
- *n++ = *o;
- }
- if (o < oend)
- ++o;
- }
- else if (*o == '\\') {
- escaped = 1;
- *n++ = *o;
- ++o;
- }
- else {
- escaped = 0;
- *n++ = *o;
- ++o;
- }
- }
-
- if (!(v = PyString_AsString(npattern))) {
- Py_DECREF(npattern);
- return NULL;
- }
- /* _PyString_Resize() decrements npattern on failure */
- _PyString_Resize(&npattern, n - v);
- return npattern;
-
-}
-
-static PyObject *
-regex_symcomp(PyObject *self, PyObject *args)
-{
- PyObject *pattern;
- PyObject *tran = NULL;
- PyObject *gdict = NULL;
- PyObject *npattern;
- PyObject *retval = NULL;
-
- if (!PyArg_ParseTuple(args, "S|S:symcomp", &pattern, &tran))
- return NULL;
-
- gdict = PyDict_New();
- if (gdict == NULL || (npattern = symcomp(pattern, gdict)) == NULL) {
- Py_XDECREF(gdict);
- return NULL;
- }
- retval = newregexobject(npattern, tran, pattern, gdict);
- Py_DECREF(npattern);
- return retval;
-}
-
-
-static PyObject *cache_pat;
-static PyObject *cache_prog;
-
-static int
-update_cache(PyObject *pat)
-{
- PyObject *tuple = PyTuple_Pack(1, pat);
- int status = 0;
-
- if (!tuple)
- return -1;
-
- if (pat != cache_pat) {
- Py_XDECREF(cache_pat);
- cache_pat = NULL;
- Py_XDECREF(cache_prog);
- cache_prog = regex_compile((PyObject *)NULL, tuple);
- if (cache_prog == NULL) {
- status = -1;
- goto finally;
- }
- cache_pat = pat;
- Py_INCREF(cache_pat);
- }
- finally:
- Py_DECREF(tuple);
- return status;
-}
-
-static PyObject *
-regex_match(PyObject *self, PyObject *args)
-{
- PyObject *pat, *string;
- PyObject *tuple, *v;
-
- if (!PyArg_ParseTuple(args, "SS:match", &pat, &string))
- return NULL;
- if (update_cache(pat) < 0)
- return NULL;
-
- if (!(tuple = Py_BuildValue("(S)", string)))
- return NULL;
- v = regobj_match((regexobject *)cache_prog, tuple);
- Py_DECREF(tuple);
- return v;
-}
-
-static PyObject *
-regex_search(PyObject *self, PyObject *args)
-{
- PyObject *pat, *string;
- PyObject *tuple, *v;
-
- if (!PyArg_ParseTuple(args, "SS:search", &pat, &string))
- return NULL;
- if (update_cache(pat) < 0)
- return NULL;
-
- if (!(tuple = Py_BuildValue("(S)", string)))
- return NULL;
- v = regobj_search((regexobject *)cache_prog, tuple);
- Py_DECREF(tuple);
- return v;
-}
-
-static PyObject *
-regex_set_syntax(PyObject *self, PyObject *args)
-{
- int syntax;
- if (!PyArg_ParseTuple(args, "i:set_syntax", &syntax))
- return NULL;
- syntax = re_set_syntax(syntax);
- /* wipe the global pattern cache */
- Py_XDECREF(cache_pat);
- cache_pat = NULL;
- Py_XDECREF(cache_prog);
- cache_prog = NULL;
- return PyInt_FromLong((long)syntax);
-}
-
-static PyObject *
-regex_get_syntax(PyObject *self)
-{
- return PyInt_FromLong((long)re_syntax);
-}
-
-
-static struct PyMethodDef regex_global_methods[] = {
- {"compile", regex_compile, METH_VARARGS},
- {"symcomp", regex_symcomp, METH_VARARGS},
- {"match", regex_match, METH_VARARGS},
- {"search", regex_search, METH_VARARGS},
- {"set_syntax", regex_set_syntax, METH_VARARGS},
- {"get_syntax", (PyCFunction)regex_get_syntax, METH_NOARGS},
- {NULL, NULL} /* sentinel */
-};
-
-PyMODINIT_FUNC
-initregex(void)
-{
- PyObject *m, *d, *v;
- int i;
- char *s;
-
- /* Initialize object type */
- Regextype.ob_type = &PyType_Type;
-
- m = Py_InitModule("regex", regex_global_methods);
- if (m == NULL)
- return;
- d = PyModule_GetDict(m);
-
- if (PyErr_Warn(PyExc_DeprecationWarning,
- "the regex module is deprecated; "
- "please use the re module") < 0)
- return;
-
- /* Initialize regex.error exception */
- v = RegexError = PyErr_NewException("regex.error", NULL, NULL);
- if (v == NULL || PyDict_SetItemString(d, "error", v) != 0)
- goto finally;
-
- /* Initialize regex.casefold constant */
- if (!(v = PyString_FromStringAndSize((char *)NULL, 256)))
- goto finally;
-
- if (!(s = PyString_AsString(v)))
- goto finally;
-
- for (i = 0; i < 256; i++) {
- if (isupper(i))
- s[i] = tolower(i);
- else
- s[i] = i;
- }
- if (PyDict_SetItemString(d, "casefold", v) < 0)
- goto finally;
- Py_DECREF(v);
-
- if (!PyErr_Occurred())
- return;
- finally:
- /* Nothing */ ;
-}
+++ /dev/null
-/* regexpr.c
- *
- * Author: Tatu Ylonen <ylo@ngs.fi>
- *
- * Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
- *
- * Permission to use, copy, modify, distribute, and sell this software
- * and its documentation for any purpose is hereby granted without
- * fee, provided that the above copyright notice appear in all copies.
- * This software is provided "as is" without express or implied
- * warranty.
- *
- * Created: Thu Sep 26 17:14:05 1991 ylo
- * Last modified: Mon Nov 4 17:06:48 1991 ylo
- * Ported to Think C: 19 Jan 1992 guido@cwi.nl
- *
- * This code draws many ideas from the regular expression packages by
- * Henry Spencer of the University of Toronto and Richard Stallman of
- * the Free Software Foundation.
- *
- * Emacs-specific code and syntax table code is almost directly borrowed
- * from GNU regexp.
- *
- * Bugs fixed and lots of reorganization by Jeffrey C. Ollie, April
- * 1997 Thanks for bug reports and ideas from Andrew Kuchling, Tim
- * Peters, Guido van Rossum, Ka-Ping Yee, Sjoerd Mullender, and
- * probably one or two others that I'm forgetting.
- *
- * $Id$ */
-
-#include "Python.h"
-#include "regexpr.h"
-
-/* The original code blithely assumed that sizeof(short) == 2. Not
- * always true. Original instances of "(short)x" were replaced by
- * SHORT(x), where SHORT is #defined below. */
-
-#define SHORT(x) ((x) & 0x8000 ? (x) - 0x10000 : (x))
-
-/* The stack implementation is taken from an idea by Andrew Kuchling.
- * It's a doubly linked list of arrays. The advantages of this over a
- * simple linked list are that the number of mallocs required are
- * reduced. It also makes it possible to statically allocate enough
- * space so that small patterns don't ever need to call malloc.
- *
- * The advantages over a single array is that is periodically
- * realloced when more space is needed is that we avoid ever copying
- * the stack. */
-
-/* item_t is the basic stack element. Defined as a union of
- * structures so that both registers, failure points, and counters can
- * be pushed/popped from the stack. There's nothing built into the
- * item to keep track of whether a certain stack item is a register, a
- * failure point, or a counter. */
-
-typedef union item_t
-{
- struct
- {
- int num;
- int level;
- unsigned char *start;
- unsigned char *end;
- } reg;
- struct
- {
- int count;
- int level;
- int phantom;
- unsigned char *code;
- unsigned char *text;
- } fail;
- struct
- {
- int num;
- int level;
- int count;
- } cntr;
-} item_t;
-
-#define STACK_PAGE_SIZE 256
-#define NUM_REGISTERS 256
-
-/* A 'page' of stack items. */
-
-typedef struct item_page_t
-{
- item_t items[STACK_PAGE_SIZE];
- struct item_page_t *prev;
- struct item_page_t *next;
-} item_page_t;
-
-
-typedef struct match_state
-{
- /* The number of registers that have been pushed onto the stack
- * since the last failure point. */
-
- int count;
-
- /* Used to control when registers need to be pushed onto the
- * stack. */
-
- int level;
-
- /* The number of failure points on the stack. */
-
- int point;
-
- /* Storage for the registers. Each register consists of two
- * pointers to characters. So register N is represented as
- * start[N] and end[N]. The pointers must be converted to
- * offsets from the beginning of the string before returning the
- * registers to the calling program. */
-
- unsigned char *start[NUM_REGISTERS];
- unsigned char *end[NUM_REGISTERS];
-
- /* Keeps track of whether a register has changed recently. */
-
- int changed[NUM_REGISTERS];
-
- /* Structure to encapsulate the stack. */
- struct
- {
- /* index into the current page. If index == 0 and you need
- * to pop an item, move to the previous page and set index
- * = STACK_PAGE_SIZE - 1. Otherwise decrement index to
- * push a page. If index == STACK_PAGE_SIZE and you need
- * to push a page move to the next page and set index =
- * 0. If there is no new next page, allocate a new page
- * and link it in. Otherwise, increment index to push a
- * page. */
-
- int index;
- item_page_t *current; /* Pointer to the current page. */
- item_page_t first; /* First page is statically allocated. */
- } stack;
-} match_state;
-
-/* Initialize a state object */
-
-/* #define NEW_STATE(state) \ */
-/* memset(&state, 0, (void *)(&state.stack) - (void *)(&state)); \ */
-/* state.stack.current = &state.stack.first; \ */
-/* state.stack.first.prev = NULL; \ */
-/* state.stack.first.next = NULL; \ */
-/* state.stack.index = 0; \ */
-/* state.level = 1 */
-
-#define NEW_STATE(state, nregs) \
-{ \
- int i; \
- for (i = 0; i < nregs; i++) \
- { \
- state.start[i] = NULL; \
- state.end[i] = NULL; \
- state.changed[i] = 0; \
- } \
- state.stack.current = &state.stack.first; \
- state.stack.first.prev = NULL; \
- state.stack.first.next = NULL; \
- state.stack.index = 0; \
- state.level = 1; \
- state.count = 0; \
- state.level = 0; \
- state.point = 0; \
-}
-
-/* Free any memory that might have been malloc'd */
-
-#define FREE_STATE(state) \
-while(state.stack.first.next != NULL) \
-{ \
- state.stack.current = state.stack.first.next; \
- state.stack.first.next = state.stack.current->next; \
- free(state.stack.current); \
-}
-
-/* Discard the top 'count' stack items. */
-
-#define STACK_DISCARD(stack, count, on_error) \
-stack.index -= count; \
-while (stack.index < 0) \
-{ \
- if (stack.current->prev == NULL) \
- on_error; \
- stack.current = stack.current->prev; \
- stack.index += STACK_PAGE_SIZE; \
-}
-
-/* Store a pointer to the previous item on the stack. Used to pop an
- * item off of the stack. */
-
-#define STACK_PREV(stack, top, on_error) \
-if (stack.index == 0) \
-{ \
- if (stack.current->prev == NULL) \
- on_error; \
- stack.current = stack.current->prev; \
- stack.index = STACK_PAGE_SIZE - 1; \
-} \
-else \
-{ \
- stack.index--; \
-} \
-top = &(stack.current->items[stack.index])
-
-/* Store a pointer to the next item on the stack. Used to push an item
- * on to the stack. */
-
-#define STACK_NEXT(stack, top, on_error) \
-if (stack.index == STACK_PAGE_SIZE) \
-{ \
- if (stack.current->next == NULL) \
- { \
- stack.current->next = (item_page_t *)malloc(sizeof(item_page_t)); \
- if (stack.current->next == NULL) \
- on_error; \
- stack.current->next->prev = stack.current; \
- stack.current->next->next = NULL; \
- } \
- stack.current = stack.current->next; \
- stack.index = 0; \
-} \
-top = &(stack.current->items[stack.index++])
-
-/* Store a pointer to the item that is 'count' items back in the
- * stack. STACK_BACK(stack, top, 1, on_error) is equivalent to
- * STACK_TOP(stack, top, on_error). */
-
-#define STACK_BACK(stack, top, count, on_error) \
-{ \
- int index; \
- item_page_t *current; \
- current = stack.current; \
- index = stack.index - (count); \
- while (index < 0) \
- { \
- if (current->prev == NULL) \
- on_error; \
- current = current->prev; \
- index += STACK_PAGE_SIZE; \
- } \
- top = &(current->items[index]); \
-}
-
-/* Store a pointer to the top item on the stack. Execute the
- * 'on_error' code if there are no items on the stack. */
-
-#define STACK_TOP(stack, top, on_error) \
-if (stack.index == 0) \
-{ \
- if (stack.current->prev == NULL) \
- on_error; \
- top = &(stack.current->prev->items[STACK_PAGE_SIZE - 1]); \
-} \
-else \
-{ \
- top = &(stack.current->items[stack.index - 1]); \
-}
-
-/* Test to see if the stack is empty */
-
-#define STACK_EMPTY(stack) ((stack.index == 0) && \
- (stack.current->prev == NULL))
-
-/* Return the start of register 'reg' */
-
-#define GET_REG_START(state, reg) (state.start[reg])
-
-/* Return the end of register 'reg' */
-
-#define GET_REG_END(state, reg) (state.end[reg])
-
-/* Set the start of register 'reg'. If the state of the register needs
- * saving, push it on the stack. */
-
-#define SET_REG_START(state, reg, text, on_error) \
-if(state.changed[reg] < state.level) \
-{ \
- item_t *item; \
- STACK_NEXT(state.stack, item, on_error); \
- item->reg.num = reg; \
- item->reg.start = state.start[reg]; \
- item->reg.end = state.end[reg]; \
- item->reg.level = state.changed[reg]; \
- state.changed[reg] = state.level; \
- state.count++; \
-} \
-state.start[reg] = text
-
-/* Set the end of register 'reg'. If the state of the register needs
- * saving, push it on the stack. */
-
-#define SET_REG_END(state, reg, text, on_error) \
-if(state.changed[reg] < state.level) \
-{ \
- item_t *item; \
- STACK_NEXT(state.stack, item, on_error); \
- item->reg.num = reg; \
- item->reg.start = state.start[reg]; \
- item->reg.end = state.end[reg]; \
- item->reg.level = state.changed[reg]; \
- state.changed[reg] = state.level; \
- state.count++; \
-} \
-state.end[reg] = text
-
-#define PUSH_FAILURE(state, xcode, xtext, on_error) \
-{ \
- item_t *item; \
- STACK_NEXT(state.stack, item, on_error); \
- item->fail.code = xcode; \
- item->fail.text = xtext; \
- item->fail.count = state.count; \
- item->fail.level = state.level; \
- item->fail.phantom = 0; \
- state.count = 0; \
- state.level++; \
- state.point++; \
-}
-
-/* Update the last failure point with a new position in the text. */
-
-#define UPDATE_FAILURE(state, xtext, on_error) \
-{ \
- item_t *item; \
- STACK_BACK(state.stack, item, state.count + 1, on_error); \
- if (!item->fail.phantom) \
- { \
- item_t *item2; \
- STACK_NEXT(state.stack, item2, on_error); \
- item2->fail.code = item->fail.code; \
- item2->fail.text = xtext; \
- item2->fail.count = state.count; \
- item2->fail.level = state.level; \
- item2->fail.phantom = 1; \
- state.count = 0; \
- state.level++; \
- state.point++; \
- } \
- else \
- { \
- STACK_DISCARD(state.stack, state.count, on_error); \
- STACK_TOP(state.stack, item, on_error); \
- item->fail.text = xtext; \
- state.count = 0; \
- state.level++; \
- } \
-}
-
-#define POP_FAILURE(state, xcode, xtext, on_empty, on_error) \
-{ \
- item_t *item; \
- do \
- { \
- while(state.count > 0) \
- { \
- STACK_PREV(state.stack, item, on_error); \
- state.start[item->reg.num] = item->reg.start; \
- state.end[item->reg.num] = item->reg.end; \
- state.changed[item->reg.num] = item->reg.level; \
- state.count--; \
- } \
- STACK_PREV(state.stack, item, on_empty); \
- xcode = item->fail.code; \
- xtext = item->fail.text; \
- state.count = item->fail.count; \
- state.level = item->fail.level; \
- state.point--; \
- } \
- while (item->fail.text == NULL); \
-}
-
-enum regexp_compiled_ops /* opcodes for compiled regexp */
-{
- Cend, /* end of pattern reached */
- Cbol, /* beginning of line */
- Ceol, /* end of line */
- Cset, /* character set. Followed by 32 bytes of set. */
- Cexact, /* followed by a byte to match */
- Canychar, /* matches any character except newline */
- Cstart_memory, /* set register start addr (followed by reg number) */
- Cend_memory, /* set register end addr (followed by reg number) */
- Cmatch_memory, /* match a duplicate of reg contents (regnum follows)*/
- Cjump, /* followed by two bytes (lsb,msb) of displacement. */
- Cstar_jump, /* will change to jump/update_failure_jump at runtime */
- Cfailure_jump, /* jump to addr on failure */
- Cupdate_failure_jump, /* update topmost failure point and jump */
- Cdummy_failure_jump, /* push a dummy failure point and jump */
- Cbegbuf, /* match at beginning of buffer */
- Cendbuf, /* match at end of buffer */
- Cwordbeg, /* match at beginning of word */
- Cwordend, /* match at end of word */
- Cwordbound, /* match if at word boundary */
- Cnotwordbound, /* match if not at word boundary */
- Csyntaxspec, /* matches syntax code (1 byte follows) */
- Cnotsyntaxspec, /* matches if syntax code does not match (1 byte follows) */
- Crepeat1
-};
-
-enum regexp_syntax_op /* syntax codes for plain and quoted characters */
-{
- Rend, /* special code for end of regexp */
- Rnormal, /* normal character */
- Ranychar, /* any character except newline */
- Rquote, /* the quote character */
- Rbol, /* match beginning of line */
- Reol, /* match end of line */
- Roptional, /* match preceding expression optionally */
- Rstar, /* match preceding expr zero or more times */
- Rplus, /* match preceding expr one or more times */
- Ror, /* match either of alternatives */
- Ropenpar, /* opening parenthesis */
- Rclosepar, /* closing parenthesis */
- Rmemory, /* match memory register */
- Rextended_memory, /* \vnn to match registers 10-99 */
- Ropenset, /* open set. Internal syntax hard-coded below. */
- /* the following are gnu extensions to "normal" regexp syntax */
- Rbegbuf, /* beginning of buffer */
- Rendbuf, /* end of buffer */
- Rwordchar, /* word character */
- Rnotwordchar, /* not word character */
- Rwordbeg, /* beginning of word */
- Rwordend, /* end of word */
- Rwordbound, /* word bound */
- Rnotwordbound, /* not word bound */
- Rnum_ops
-};
-
-static int re_compile_initialized = 0;
-static int regexp_syntax = 0;
-int re_syntax = 0; /* Exported copy of regexp_syntax */
-static unsigned char regexp_plain_ops[256];
-static unsigned char regexp_quoted_ops[256];
-static unsigned char regexp_precedences[Rnum_ops];
-static int regexp_context_indep_ops;
-static int regexp_ansi_sequences;
-
-#define NUM_LEVELS 5 /* number of precedence levels in use */
-#define MAX_NESTING 100 /* max nesting level of operators */
-
-#define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)]
-
-unsigned char re_syntax_table[256];
-
-void re_compile_initialize(void)
-{
- int a;
-
- static int syntax_table_inited = 0;
-
- if (!syntax_table_inited)
- {
- syntax_table_inited = 1;
- memset(re_syntax_table, 0, 256);
- for (a = 'a'; a <= 'z'; a++)
- re_syntax_table[a] = Sword;
- for (a = 'A'; a <= 'Z'; a++)
- re_syntax_table[a] = Sword;
- for (a = '0'; a <= '9'; a++)
- re_syntax_table[a] = Sword | Sdigit | Shexdigit;
- for (a = '0'; a <= '7'; a++)
- re_syntax_table[a] |= Soctaldigit;
- for (a = 'A'; a <= 'F'; a++)
- re_syntax_table[a] |= Shexdigit;
- for (a = 'a'; a <= 'f'; a++)
- re_syntax_table[a] |= Shexdigit;
- re_syntax_table['_'] = Sword;
- for (a = 9; a <= 13; a++)
- re_syntax_table[a] = Swhitespace;
- re_syntax_table[' '] = Swhitespace;
- }
- re_compile_initialized = 1;
- for (a = 0; a < 256; a++)
- {
- regexp_plain_ops[a] = Rnormal;
- regexp_quoted_ops[a] = Rnormal;
- }
- for (a = '0'; a <= '9'; a++)
- regexp_quoted_ops[a] = Rmemory;
- regexp_plain_ops['\134'] = Rquote;
- if (regexp_syntax & RE_NO_BK_PARENS)
- {
- regexp_plain_ops['('] = Ropenpar;
- regexp_plain_ops[')'] = Rclosepar;
- }
- else
- {
- regexp_quoted_ops['('] = Ropenpar;
- regexp_quoted_ops[')'] = Rclosepar;
- }
- if (regexp_syntax & RE_NO_BK_VBAR)
- regexp_plain_ops['\174'] = Ror;
- else
- regexp_quoted_ops['\174'] = Ror;
- regexp_plain_ops['*'] = Rstar;
- if (regexp_syntax & RE_BK_PLUS_QM)
- {
- regexp_quoted_ops['+'] = Rplus;
- regexp_quoted_ops['?'] = Roptional;
- }
- else
- {
- regexp_plain_ops['+'] = Rplus;
- regexp_plain_ops['?'] = Roptional;
- }
- if (regexp_syntax & RE_NEWLINE_OR)
- regexp_plain_ops['\n'] = Ror;
- regexp_plain_ops['\133'] = Ropenset;
- regexp_plain_ops['\136'] = Rbol;
- regexp_plain_ops['$'] = Reol;
- regexp_plain_ops['.'] = Ranychar;
- if (!(regexp_syntax & RE_NO_GNU_EXTENSIONS))
- {
- regexp_quoted_ops['w'] = Rwordchar;
- regexp_quoted_ops['W'] = Rnotwordchar;
- regexp_quoted_ops['<'] = Rwordbeg;
- regexp_quoted_ops['>'] = Rwordend;
- regexp_quoted_ops['b'] = Rwordbound;
- regexp_quoted_ops['B'] = Rnotwordbound;
- regexp_quoted_ops['`'] = Rbegbuf;
- regexp_quoted_ops['\''] = Rendbuf;
- }
- if (regexp_syntax & RE_ANSI_HEX)
- regexp_quoted_ops['v'] = Rextended_memory;
- for (a = 0; a < Rnum_ops; a++)
- regexp_precedences[a] = 4;
- if (regexp_syntax & RE_TIGHT_VBAR)
- {
- regexp_precedences[Ror] = 3;
- regexp_precedences[Rbol] = 2;
- regexp_precedences[Reol] = 2;
- }
- else
- {
- regexp_precedences[Ror] = 2;
- regexp_precedences[Rbol] = 3;
- regexp_precedences[Reol] = 3;
- }
- regexp_precedences[Rclosepar] = 1;
- regexp_precedences[Rend] = 0;
- regexp_context_indep_ops = (regexp_syntax & RE_CONTEXT_INDEP_OPS) != 0;
- regexp_ansi_sequences = (regexp_syntax & RE_ANSI_HEX) != 0;
-}
-
-int re_set_syntax(int syntax)
-{
- int ret;
-
- ret = regexp_syntax;
- regexp_syntax = syntax;
- re_syntax = syntax; /* Exported copy */
- re_compile_initialize();
- return ret;
-}
-
-static int hex_char_to_decimal(int ch)
-{
- if (ch >= '0' && ch <= '9')
- return ch - '0';
- if (ch >= 'a' && ch <= 'f')
- return ch - 'a' + 10;
- if (ch >= 'A' && ch <= 'F')
- return ch - 'A' + 10;
- return 16;
-}
-
-static void re_compile_fastmap_aux(unsigned char *code, int pos,
- unsigned char *visited,
- unsigned char *can_be_null,
- unsigned char *fastmap)
-{
- int a;
- int b;
- int syntaxcode;
-
- if (visited[pos])
- return; /* we have already been here */
- visited[pos] = 1;
- for (;;)
- switch (code[pos++]) {
- case Cend:
- {
- *can_be_null = 1;
- return;
- }
- case Cbol:
- case Cbegbuf:
- case Cendbuf:
- case Cwordbeg:
- case Cwordend:
- case Cwordbound:
- case Cnotwordbound:
- {
- for (a = 0; a < 256; a++)
- fastmap[a] = 1;
- break;
- }
- case Csyntaxspec:
- {
- syntaxcode = code[pos++];
- for (a = 0; a < 256; a++)
- if (SYNTAX(a) & syntaxcode)
- fastmap[a] = 1;
- return;
- }
- case Cnotsyntaxspec:
- {
- syntaxcode = code[pos++];
- for (a = 0; a < 256; a++)
- if (!(SYNTAX(a) & syntaxcode) )
- fastmap[a] = 1;
- return;
- }
- case Ceol:
- {
- fastmap['\n'] = 1;
- if (*can_be_null == 0)
- *can_be_null = 2; /* can match null, but only at end of buffer*/
- return;
- }
- case Cset:
- {
- for (a = 0; a < 256/8; a++)
- if (code[pos + a] != 0)
- for (b = 0; b < 8; b++)
- if (code[pos + a] & (1 << b))
- fastmap[(a << 3) + b] = 1;
- pos += 256/8;
- return;
- }
- case Cexact:
- {
- fastmap[(unsigned char)code[pos]] = 1;
- return;
- }
- case Canychar:
- {
- for (a = 0; a < 256; a++)
- if (a != '\n')
- fastmap[a] = 1;
- return;
- }
- case Cstart_memory:
- case Cend_memory:
- {
- pos++;
- break;
- }
- case Cmatch_memory:
- {
- for (a = 0; a < 256; a++)
- fastmap[a] = 1;
- *can_be_null = 1;
- return;
- }
- case Cjump:
- case Cdummy_failure_jump:
- case Cupdate_failure_jump:
- case Cstar_jump:
- {
- a = (unsigned char)code[pos++];
- a |= (unsigned char)code[pos++] << 8;
- pos += (int)SHORT(a);
- if (visited[pos])
- {
- /* argh... the regexp contains empty loops. This is not
- good, as this may cause a failure stack overflow when
- matching. Oh well. */
- /* this path leads nowhere; pursue other paths. */
- return;
- }
- visited[pos] = 1;
- break;
- }
- case Cfailure_jump:
- {
- a = (unsigned char)code[pos++];
- a |= (unsigned char)code[pos++] << 8;
- a = pos + (int)SHORT(a);
- re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap);
- break;
- }
- case Crepeat1:
- {
- pos += 2;
- break;
- }
- default:
- {
- PyErr_SetString(PyExc_SystemError, "Unknown regex opcode: memory corrupted?");
- return;
- /*NOTREACHED*/
- }
- }
-}
-
-static int re_do_compile_fastmap(unsigned char *buffer, int used, int pos,
- unsigned char *can_be_null,
- unsigned char *fastmap)
-{
- unsigned char small_visited[512], *visited;
-
- if (used <= sizeof(small_visited))
- visited = small_visited;
- else
- {
- visited = malloc(used);
- if (!visited)
- return 0;
- }
- *can_be_null = 0;
- memset(fastmap, 0, 256);
- memset(visited, 0, used);
- re_compile_fastmap_aux(buffer, pos, visited, can_be_null, fastmap);
- if (visited != small_visited)
- free(visited);
- return 1;
-}
-
-void re_compile_fastmap(regexp_t bufp)
-{
- if (!bufp->fastmap || bufp->fastmap_accurate)
- return;
- assert(bufp->used > 0);
- if (!re_do_compile_fastmap(bufp->buffer,
- bufp->used,
- 0,
- &bufp->can_be_null,
- bufp->fastmap))
- return;
- if (PyErr_Occurred()) return;
- if (bufp->buffer[0] == Cbol)
- bufp->anchor = 1; /* begline */
- else
- if (bufp->buffer[0] == Cbegbuf)
- bufp->anchor = 2; /* begbuf */
- else
- bufp->anchor = 0; /* none */
- bufp->fastmap_accurate = 1;
-}
-
-/*
- * star is coded as:
- * 1: failure_jump 2
- * ... code for operand of star
- * star_jump 1
- * 2: ... code after star
- *
- * We change the star_jump to update_failure_jump if we can determine
- * that it is safe to do so; otherwise we change it to an ordinary
- * jump.
- *
- * plus is coded as
- *
- * jump 2
- * 1: failure_jump 3
- * 2: ... code for operand of plus
- * star_jump 1
- * 3: ... code after plus
- *
- * For star_jump considerations this is processed identically to star.
- *
- */
-
-static int re_optimize_star_jump(regexp_t bufp, unsigned char *code)
-{
- unsigned char map[256];
- unsigned char can_be_null;
- unsigned char *p1;
- unsigned char *p2;
- unsigned char ch;
- int a;
- int b;
- int num_instructions = 0;
-
- a = (unsigned char)*code++;
- a |= (unsigned char)*code++ << 8;
- a = (int)SHORT(a);
-
- p1 = code + a + 3; /* skip the failure_jump */
- /* Check that the jump is within the pattern */
- if (p1<bufp->buffer || bufp->buffer+bufp->used<p1)
- {
- PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (failure_jump opt)");
- return 0;
- }
-
- assert(p1[-3] == Cfailure_jump);
- p2 = code;
- /* p1 points inside loop, p2 points to after loop */
- if (!re_do_compile_fastmap(bufp->buffer, bufp->used,
- (int)(p2 - bufp->buffer),
- &can_be_null, map))
- goto make_normal_jump;
-
- /* If we might introduce a new update point inside the
- * loop, we can't optimize because then update_jump would
- * update a wrong failure point. Thus we have to be
- * quite careful here.
- */
-
- /* loop until we find something that consumes a character */
- loop_p1:
- num_instructions++;
- switch (*p1++)
- {
- case Cbol:
- case Ceol:
- case Cbegbuf:
- case Cendbuf:
- case Cwordbeg:
- case Cwordend:
- case Cwordbound:
- case Cnotwordbound:
- {
- goto loop_p1;
- }
- case Cstart_memory:
- case Cend_memory:
- {
- p1++;
- goto loop_p1;
- }
- case Cexact:
- {
- ch = (unsigned char)*p1++;
- if (map[(int)ch])
- goto make_normal_jump;
- break;
- }
- case Canychar:
- {
- for (b = 0; b < 256; b++)
- if (b != '\n' && map[b])
- goto make_normal_jump;
- break;
- }
- case Cset:
- {
- for (b = 0; b < 256; b++)
- if ((p1[b >> 3] & (1 << (b & 7))) && map[b])
- goto make_normal_jump;
- p1 += 256/8;
- break;
- }
- default:
- {
- goto make_normal_jump;
- }
- }
- /* now we know that we can't backtrack. */
- while (p1 != p2 - 3)
- {
- num_instructions++;
- switch (*p1++)
- {
- case Cend:
- {
- return 0;
- }
- case Cbol:
- case Ceol:
- case Canychar:
- case Cbegbuf:
- case Cendbuf:
- case Cwordbeg:
- case Cwordend:
- case Cwordbound:
- case Cnotwordbound:
- {
- break;
- }
- case Cset:
- {
- p1 += 256/8;
- break;
- }
- case Cexact:
- case Cstart_memory:
- case Cend_memory:
- case Cmatch_memory:
- case Csyntaxspec:
- case Cnotsyntaxspec:
- {
- p1++;
- break;
- }
- case Cjump:
- case Cstar_jump:
- case Cfailure_jump:
- case Cupdate_failure_jump:
- case Cdummy_failure_jump:
- {
- goto make_normal_jump;
- }
- default:
- {
- return 0;
- }
- }
- }
-
- /* make_update_jump: */
- code -= 3;
- a += 3; /* jump to after the Cfailure_jump */
- code[0] = Cupdate_failure_jump;
- code[1] = a & 0xff;
- code[2] = a >> 8;
- if (num_instructions > 1)
- return 1;
- assert(num_instructions == 1);
- /* if the only instruction matches a single character, we can do
- * better */
- p1 = code + 3 + a; /* start of sole instruction */
- if (*p1 == Cset || *p1 == Cexact || *p1 == Canychar ||
- *p1 == Csyntaxspec || *p1 == Cnotsyntaxspec)
- code[0] = Crepeat1;
- return 1;
-
- make_normal_jump:
- code -= 3;
- *code = Cjump;
- return 1;
-}
-
-static int re_optimize(regexp_t bufp)
-{
- unsigned char *code;
-
- code = bufp->buffer;
-
- while(1)
- {
- switch (*code++)
- {
- case Cend:
- {
- return 1;
- }
- case Canychar:
- case Cbol:
- case Ceol:
- case Cbegbuf:
- case Cendbuf:
- case Cwordbeg:
- case Cwordend:
- case Cwordbound:
- case Cnotwordbound:
- {
- break;
- }
- case Cset:
- {
- code += 256/8;
- break;
- }
- case Cexact:
- case Cstart_memory:
- case Cend_memory:
- case Cmatch_memory:
- case Csyntaxspec:
- case Cnotsyntaxspec:
- {
- code++;
- break;
- }
- case Cstar_jump:
- {
- if (!re_optimize_star_jump(bufp, code))
- {
- return 0;
- }
- /* fall through */
- }
- case Cupdate_failure_jump:
- case Cjump:
- case Cdummy_failure_jump:
- case Cfailure_jump:
- case Crepeat1:
- {
- code += 2;
- break;
- }
- default:
- {
- return 0;
- }
- }
- }
-}
-
-#define NEXTCHAR(var) \
-{ \
- if (pos >= size) \
- goto ends_prematurely; \
- (var) = regex[pos]; \
- pos++; \
-}
-
-#define ALLOC(amount) \
-{ \
- if (pattern_offset+(amount) > alloc) \
- { \
- alloc += 256 + (amount); \
- pattern = realloc(pattern, alloc); \
- if (!pattern) \
- goto out_of_memory; \
- } \
-}
-
-#define STORE(ch) pattern[pattern_offset++] = (ch)
-
-#define CURRENT_LEVEL_START (starts[starts_base + current_level])
-
-#define SET_LEVEL_START starts[starts_base + current_level] = pattern_offset
-
-#define PUSH_LEVEL_STARTS \
-if (starts_base < (MAX_NESTING-1)*NUM_LEVELS) \
- starts_base += NUM_LEVELS; \
-else \
- goto too_complex \
-
-#define POP_LEVEL_STARTS starts_base -= NUM_LEVELS
-
-#define PUT_ADDR(offset,addr) \
-{ \
- int disp = (addr) - (offset) - 2; \
- pattern[(offset)] = disp & 0xff; \
- pattern[(offset)+1] = (disp>>8) & 0xff; \
-}
-
-#define INSERT_JUMP(pos,type,addr) \
-{ \
- int a, p = (pos), t = (type), ad = (addr); \
- for (a = pattern_offset - 1; a >= p; a--) \
- pattern[a + 3] = pattern[a]; \
- pattern[p] = t; \
- PUT_ADDR(p+1,ad); \
- pattern_offset += 3; \
-}
-
-#define SETBIT(buf,offset,bit) (buf)[(offset)+(bit)/8] |= (1<<((bit) & 7))
-
-#define SET_FIELDS \
-{ \
- bufp->allocated = alloc; \
- bufp->buffer = pattern; \
- bufp->used = pattern_offset; \
-}
-
-#define GETHEX(var) \
-{ \
- unsigned char gethex_ch, gethex_value; \
- NEXTCHAR(gethex_ch); \
- gethex_value = hex_char_to_decimal(gethex_ch); \
- if (gethex_value == 16) \
- goto hex_error; \
- NEXTCHAR(gethex_ch); \
- gethex_ch = hex_char_to_decimal(gethex_ch); \
- if (gethex_ch == 16) \
- goto hex_error; \
- (var) = gethex_value * 16 + gethex_ch; \
-}
-
-#define ANSI_TRANSLATE(ch) \
-{ \
- switch (ch) \
- { \
- case 'a': \
- case 'A': \
- { \
- ch = 7; /* audible bell */ \
- break; \
- } \
- case 'b': \
- case 'B': \
- { \
- ch = 8; /* backspace */ \
- break; \
- } \
- case 'f': \
- case 'F': \
- { \
- ch = 12; /* form feed */ \
- break; \
- } \
- case 'n': \
- case 'N': \
- { \
- ch = 10; /* line feed */ \
- break; \
- } \
- case 'r': \
- case 'R': \
- { \
- ch = 13; /* carriage return */ \
- break; \
- } \
- case 't': \
- case 'T': \
- { \
- ch = 9; /* tab */ \
- break; \
- } \
- case 'v': \
- case 'V': \
- { \
- ch = 11; /* vertical tab */ \
- break; \
- } \
- case 'x': /* hex code */ \
- case 'X': \
- { \
- GETHEX(ch); \
- break; \
- } \
- default: \
- { \
- /* other characters passed through */ \
- if (translate) \
- ch = translate[(unsigned char)ch]; \
- break; \
- } \
- } \
-}
-
-char *re_compile_pattern(unsigned char *regex, int size, regexp_t bufp)
-{
- int a;
- int pos;
- int op;
- int current_level;
- int level;
- int opcode;
- int pattern_offset = 0, alloc;
- int starts[NUM_LEVELS * MAX_NESTING];
- int starts_base;
- int future_jumps[MAX_NESTING];
- int num_jumps;
- unsigned char ch = '\0';
- unsigned char *pattern;
- unsigned char *translate;
- int next_register;
- int paren_depth;
- int num_open_registers;
- int open_registers[RE_NREGS];
- int beginning_context;
-
- if (!re_compile_initialized)
- re_compile_initialize();
- bufp->used = 0;
- bufp->fastmap_accurate = 0;
- bufp->uses_registers = 1;
- bufp->num_registers = 1;
- translate = bufp->translate;
- pattern = bufp->buffer;
- alloc = bufp->allocated;
- if (alloc == 0 || pattern == NULL)
- {
- alloc = 256;
- pattern = malloc(alloc);
- if (!pattern)
- goto out_of_memory;
- }
- pattern_offset = 0;
- starts_base = 0;
- num_jumps = 0;
- current_level = 0;
- SET_LEVEL_START;
- num_open_registers = 0;
- next_register = 1;
- paren_depth = 0;
- beginning_context = 1;
- op = -1;
- /* we use Rend dummy to ensure that pending jumps are updated
- (due to low priority of Rend) before exiting the loop. */
- pos = 0;
- while (op != Rend)
- {
- if (pos >= size)
- op = Rend;
- else
- {
- NEXTCHAR(ch);
- if (translate)
- ch = translate[(unsigned char)ch];
- op = regexp_plain_ops[(unsigned char)ch];
- if (op == Rquote)
- {
- NEXTCHAR(ch);
- op = regexp_quoted_ops[(unsigned char)ch];
- if (op == Rnormal && regexp_ansi_sequences)
- ANSI_TRANSLATE(ch);
- }
- }
- level = regexp_precedences[op];
- /* printf("ch='%c' op=%d level=%d current_level=%d
- curlevstart=%d\n", ch, op, level, current_level,
- CURRENT_LEVEL_START); */
- if (level > current_level)
- {
- for (current_level++; current_level < level; current_level++)
- SET_LEVEL_START;
- SET_LEVEL_START;
- }
- else
- if (level < current_level)
- {
- current_level = level;
- for (;num_jumps > 0 &&
- future_jumps[num_jumps-1] >= CURRENT_LEVEL_START;
- num_jumps--)
- PUT_ADDR(future_jumps[num_jumps-1], pattern_offset);
- }
- switch (op)
- {
- case Rend:
- {
- break;
- }
- case Rnormal:
- {
- normal_char:
- opcode = Cexact;
- store_opcode_and_arg: /* opcode & ch must be set */
- SET_LEVEL_START;
- ALLOC(2);
- STORE(opcode);
- STORE(ch);
- break;
- }
- case Ranychar:
- {
- opcode = Canychar;
- store_opcode:
- SET_LEVEL_START;
- ALLOC(1);
- STORE(opcode);
- break;
- }
- case Rquote:
- {
- Py_FatalError("Rquote");
- /*NOTREACHED*/
- }
- case Rbol:
- {
- if (!beginning_context) {
- if (regexp_context_indep_ops)
- goto op_error;
- else
- goto normal_char;
- }
- opcode = Cbol;
- goto store_opcode;
- }
- case Reol:
- {
- if (!((pos >= size) ||
- ((regexp_syntax & RE_NO_BK_VBAR) ?
- (regex[pos] == '\174') :
- (pos+1 < size && regex[pos] == '\134' &&
- regex[pos+1] == '\174')) ||
- ((regexp_syntax & RE_NO_BK_PARENS)?
- (regex[pos] == ')'):
- (pos+1 < size && regex[pos] == '\134' &&
- regex[pos+1] == ')')))) {
- if (regexp_context_indep_ops)
- goto op_error;
- else
- goto normal_char;
- }
- opcode = Ceol;
- goto store_opcode;
- /* NOTREACHED */
- break;
- }
- case Roptional:
- {
- if (beginning_context) {
- if (regexp_context_indep_ops)
- goto op_error;
- else
- goto normal_char;
- }
- if (CURRENT_LEVEL_START == pattern_offset)
- break; /* ignore empty patterns for ? */
- ALLOC(3);
- INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
- pattern_offset + 3);
- break;
- }
- case Rstar:
- case Rplus:
- {
- if (beginning_context) {
- if (regexp_context_indep_ops)
- goto op_error;
- else
- goto normal_char;
- }
- if (CURRENT_LEVEL_START == pattern_offset)
- break; /* ignore empty patterns for + and * */
- ALLOC(9);
- INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
- pattern_offset + 6);
- INSERT_JUMP(pattern_offset, Cstar_jump, CURRENT_LEVEL_START);
- if (op == Rplus) /* jump over initial failure_jump */
- INSERT_JUMP(CURRENT_LEVEL_START, Cdummy_failure_jump,
- CURRENT_LEVEL_START + 6);
- break;
- }
- case Ror:
- {
- ALLOC(6);
- INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
- pattern_offset + 6);
- if (num_jumps >= MAX_NESTING)
- goto too_complex;
- STORE(Cjump);
- future_jumps[num_jumps++] = pattern_offset;
- STORE(0);
- STORE(0);
- SET_LEVEL_START;
- break;
- }
- case Ropenpar:
- {
- SET_LEVEL_START;
- if (next_register < RE_NREGS)
- {
- bufp->uses_registers = 1;
- ALLOC(2);
- STORE(Cstart_memory);
- STORE(next_register);
- open_registers[num_open_registers++] = next_register;
- bufp->num_registers++;
- next_register++;
- }
- paren_depth++;
- PUSH_LEVEL_STARTS;
- current_level = 0;
- SET_LEVEL_START;
- break;
- }
- case Rclosepar:
- {
- if (paren_depth <= 0)
- goto parenthesis_error;
- POP_LEVEL_STARTS;
- current_level = regexp_precedences[Ropenpar];
- paren_depth--;
- if (paren_depth < num_open_registers)
- {
- bufp->uses_registers = 1;
- ALLOC(2);
- STORE(Cend_memory);
- num_open_registers--;
- STORE(open_registers[num_open_registers]);
- }
- break;
- }
- case Rmemory:
- {
- if (ch == '0')
- goto bad_match_register;
- assert(ch >= '0' && ch <= '9');
- bufp->uses_registers = 1;
- opcode = Cmatch_memory;
- ch -= '0';
- goto store_opcode_and_arg;
- }
- case Rextended_memory:
- {
- NEXTCHAR(ch);
- if (ch < '0' || ch > '9')
- goto bad_match_register;
- NEXTCHAR(a);
- if (a < '0' || a > '9')
- goto bad_match_register;
- ch = 10 * (a - '0') + ch - '0';
- if (ch == 0 || ch >= RE_NREGS)
- goto bad_match_register;
- bufp->uses_registers = 1;
- opcode = Cmatch_memory;
- goto store_opcode_and_arg;
- }
- case Ropenset:
- {
- int complement;
- int prev;
- int offset;
- int range;
- int firstchar;
-
- SET_LEVEL_START;
- ALLOC(1+256/8);
- STORE(Cset);
- offset = pattern_offset;
- for (a = 0; a < 256/8; a++)
- STORE(0);
- NEXTCHAR(ch);
- if (translate)
- ch = translate[(unsigned char)ch];
- if (ch == '\136')
- {
- complement = 1;
- NEXTCHAR(ch);
- if (translate)
- ch = translate[(unsigned char)ch];
- }
- else
- complement = 0;
- prev = -1;
- range = 0;
- firstchar = 1;
- while (ch != '\135' || firstchar)
- {
- firstchar = 0;
- if (regexp_ansi_sequences && ch == '\134')
- {
- NEXTCHAR(ch);
- ANSI_TRANSLATE(ch);
- }
- if (range)
- {
- for (a = prev; a <= (int)ch; a++)
- SETBIT(pattern, offset, a);
- prev = -1;
- range = 0;
- }
- else
- if (prev != -1 && ch == '-')
- range = 1;
- else
- {
- SETBIT(pattern, offset, ch);
- prev = ch;
- }
- NEXTCHAR(ch);
- if (translate)
- ch = translate[(unsigned char)ch];
- }
- if (range)
- SETBIT(pattern, offset, '-');
- if (complement)
- {
- for (a = 0; a < 256/8; a++)
- pattern[offset+a] ^= 0xff;
- }
- break;
- }
- case Rbegbuf:
- {
- opcode = Cbegbuf;
- goto store_opcode;
- }
- case Rendbuf:
- {
- opcode = Cendbuf;
- goto store_opcode;
- }
- case Rwordchar:
- {
- opcode = Csyntaxspec;
- ch = Sword;
- goto store_opcode_and_arg;
- }
- case Rnotwordchar:
- {
- opcode = Cnotsyntaxspec;
- ch = Sword;
- goto store_opcode_and_arg;
- }
- case Rwordbeg:
- {
- opcode = Cwordbeg;
- goto store_opcode;
- }
- case Rwordend:
- {
- opcode = Cwordend;
- goto store_opcode;
- }
- case Rwordbound:
- {
- opcode = Cwordbound;
- goto store_opcode;
- }
- case Rnotwordbound:
- {
- opcode = Cnotwordbound;
- goto store_opcode;
- }
- default:
- {
- abort();
- }
- }
- beginning_context = (op == Ropenpar || op == Ror);
- }
- if (starts_base != 0)
- goto parenthesis_error;
- assert(num_jumps == 0);
- ALLOC(1);
- STORE(Cend);
- SET_FIELDS;
- if(!re_optimize(bufp))
- return "Optimization error";
- return NULL;
-
- op_error:
- SET_FIELDS;
- return "Badly placed special character";
-
- bad_match_register:
- SET_FIELDS;
- return "Bad match register number";
-
- hex_error:
- SET_FIELDS;
- return "Bad hexadecimal number";
-
- parenthesis_error:
- SET_FIELDS;
- return "Badly placed parenthesis";
-
- out_of_memory:
- SET_FIELDS;
- return "Out of memory";
-
- ends_prematurely:
- SET_FIELDS;
- return "Regular expression ends prematurely";
-
- too_complex:
- SET_FIELDS;
- return "Regular expression too complex";
-}
-
-#undef CHARAT
-#undef NEXTCHAR
-#undef GETHEX
-#undef ALLOC
-#undef STORE
-#undef CURRENT_LEVEL_START
-#undef SET_LEVEL_START
-#undef PUSH_LEVEL_STARTS
-#undef POP_LEVEL_STARTS
-#undef PUT_ADDR
-#undef INSERT_JUMP
-#undef SETBIT
-#undef SET_FIELDS
-
-#define PREFETCH if (text == textend) goto fail
-
-#define NEXTCHAR(var) \
-PREFETCH; \
-var = (unsigned char)*text++; \
-if (translate) \
- var = translate[var]
-
-int re_match(regexp_t bufp, unsigned char *string, int size, int pos,
- regexp_registers_t old_regs)
-{
- unsigned char *code;
- unsigned char *translate;
- unsigned char *text;
- unsigned char *textstart;
- unsigned char *textend;
- int a;
- int b;
- int ch;
- int reg;
- int match_end;
- unsigned char *regstart;
- unsigned char *regend;
- int regsize;
- match_state state;
-
- assert(pos >= 0 && size >= 0);
- assert(pos <= size);
-
- text = string + pos;
- textstart = string;
- textend = string + size;
-
- code = bufp->buffer;
-
- translate = bufp->translate;
-
- NEW_STATE(state, bufp->num_registers);
-
- continue_matching:
- switch (*code++)
- {
- case Cend:
- {
- match_end = text - textstart;
- if (old_regs)
- {
- old_regs->start[0] = pos;
- old_regs->end[0] = match_end;
- if (!bufp->uses_registers)
- {
- for (a = 1; a < RE_NREGS; a++)
- {
- old_regs->start[a] = -1;
- old_regs->end[a] = -1;
- }
- }
- else
- {
- for (a = 1; a < bufp->num_registers; a++)
- {
- if ((GET_REG_START(state, a) == NULL) ||
- (GET_REG_END(state, a) == NULL))
- {
- old_regs->start[a] = -1;
- old_regs->end[a] = -1;
- continue;
- }
- old_regs->start[a] = GET_REG_START(state, a) - textstart;
- old_regs->end[a] = GET_REG_END(state, a) - textstart;
- }
- for (; a < RE_NREGS; a++)
- {
- old_regs->start[a] = -1;
- old_regs->end[a] = -1;
- }
- }
- }
- FREE_STATE(state);
- return match_end - pos;
- }
- case Cbol:
- {
- if (text == textstart || text[-1] == '\n')
- goto continue_matching;
- goto fail;
- }
- case Ceol:
- {
- if (text == textend || *text == '\n')
- goto continue_matching;
- goto fail;
- }
- case Cset:
- {
- NEXTCHAR(ch);
- if (code[ch/8] & (1<<(ch & 7)))
- {
- code += 256/8;
- goto continue_matching;
- }
- goto fail;
- }
- case Cexact:
- {
- NEXTCHAR(ch);
- if (ch != (unsigned char)*code++)
- goto fail;
- goto continue_matching;
- }
- case Canychar:
- {
- NEXTCHAR(ch);
- if (ch == '\n')
- goto fail;
- goto continue_matching;
- }
- case Cstart_memory:
- {
- reg = *code++;
- SET_REG_START(state, reg, text, goto error);
- goto continue_matching;
- }
- case Cend_memory:
- {
- reg = *code++;
- SET_REG_END(state, reg, text, goto error);
- goto continue_matching;
- }
- case Cmatch_memory:
- {
- reg = *code++;
- regstart = GET_REG_START(state, reg);
- regend = GET_REG_END(state, reg);
- if ((regstart == NULL) || (regend == NULL))
- goto fail; /* or should we just match nothing? */
- regsize = regend - regstart;
-
- if (regsize > (textend - text))
- goto fail;
- if(translate)
- {
- for (; regstart < regend; regstart++, text++)
- if (translate[*regstart] != translate[*text])
- goto fail;
- }
- else
- for (; regstart < regend; regstart++, text++)
- if (*regstart != *text)
- goto fail;
- goto continue_matching;
- }
- case Cupdate_failure_jump:
- {
- UPDATE_FAILURE(state, text, goto error);
- /* fall to next case */
- }
- /* treat Cstar_jump just like Cjump if it hasn't been optimized */
- case Cstar_jump:
- case Cjump:
- {
- a = (unsigned char)*code++;
- a |= (unsigned char)*code++ << 8;
- code += (int)SHORT(a);
- if (code<bufp->buffer || bufp->buffer+bufp->used<code) {
- PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cjump)");
- FREE_STATE(state);
- return -2;
- }
- goto continue_matching;
- }
- case Cdummy_failure_jump:
- {
- unsigned char *failuredest;
-
- a = (unsigned char)*code++;
- a |= (unsigned char)*code++ << 8;
- a = (int)SHORT(a);
- assert(*code == Cfailure_jump);
- b = (unsigned char)code[1];
- b |= (unsigned char)code[2] << 8;
- failuredest = code + (int)SHORT(b) + 3;
- if (failuredest<bufp->buffer || bufp->buffer+bufp->used < failuredest) {
- PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cdummy_failure_jump failuredest)");
- FREE_STATE(state);
- return -2;
- }
- PUSH_FAILURE(state, failuredest, NULL, goto error);
- code += a;
- if (code<bufp->buffer || bufp->buffer+bufp->used < code) {
- PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cdummy_failure_jump code)");
- FREE_STATE(state);
- return -2;
- }
- goto continue_matching;
- }
- case Cfailure_jump:
- {
- a = (unsigned char)*code++;
- a |= (unsigned char)*code++ << 8;
- a = (int)SHORT(a);
- if (code+a<bufp->buffer || bufp->buffer+bufp->used < code+a) {
- PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cfailure_jump)");
- FREE_STATE(state);
- return -2;
- }
- PUSH_FAILURE(state, code + a, text, goto error);
- goto continue_matching;
- }
- case Crepeat1:
- {
- unsigned char *pinst;
- a = (unsigned char)*code++;
- a |= (unsigned char)*code++ << 8;
- a = (int)SHORT(a);
- pinst = code + a;
- if (pinst<bufp->buffer || bufp->buffer+bufp->used<pinst) {
- PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Crepeat1)");
- FREE_STATE(state);
- return -2;
- }
- /* pinst is sole instruction in loop, and it matches a
- * single character. Since Crepeat1 was originally a
- * Cupdate_failure_jump, we also know that backtracking
- * is useless: so long as the single-character
- * expression matches, it must be used. Also, in the
- * case of +, we've already matched one character, so +
- * can't fail: nothing here can cause a failure. */
- switch (*pinst++)
- {
- case Cset:
- {
- if (translate)
- {
- while (text < textend)
- {
- ch = translate[(unsigned char)*text];
- if (pinst[ch/8] & (1<<(ch & 7)))
- text++;
- else
- break;
- }
- }
- else
- {
- while (text < textend)
- {
- ch = (unsigned char)*text;
- if (pinst[ch/8] & (1<<(ch & 7)))
- text++;
- else
- break;
- }
- }
- break;
- }
- case Cexact:
- {
- ch = (unsigned char)*pinst;
- if (translate)
- {
- while (text < textend &&
- translate[(unsigned char)*text] == ch)
- text++;
- }
- else
- {
- while (text < textend && (unsigned char)*text == ch)
- text++;
- }
- break;
- }
- case Canychar:
- {
- while (text < textend && (unsigned char)*text != '\n')
- text++;
- break;
- }
- case Csyntaxspec:
- {
- a = (unsigned char)*pinst;
- if (translate)
- {
- while (text < textend &&
- (SYNTAX(translate[*text]) & a) )
- text++;
- }
- else
- {
- while (text < textend && (SYNTAX(*text) & a) )
- text++;
- }
- break;
- }
- case Cnotsyntaxspec:
- {
- a = (unsigned char)*pinst;
- if (translate)
- {
- while (text < textend &&
- !(SYNTAX(translate[*text]) & a) )
- text++;
- }
- else
- {
- while (text < textend && !(SYNTAX(*text) & a) )
- text++;
- }
- break;
- }
- default:
- {
- FREE_STATE(state);
- PyErr_SetString(PyExc_SystemError, "Unknown regex opcode: memory corrupted?");
- return -2;
- /*NOTREACHED*/
- }
- }
- /* due to the funky way + and * are compiled, the top
- * failure- stack entry at this point is actually a
- * success entry -- update it & pop it */
- UPDATE_FAILURE(state, text, goto error);
- goto fail; /* i.e., succeed <wink/sigh> */
- }
- case Cbegbuf:
- {
- if (text == textstart)
- goto continue_matching;
- goto fail;
- }
- case Cendbuf:
- {
- if (text == textend)
- goto continue_matching;
- goto fail;
- }
- case Cwordbeg:
- {
- if (text == textend)
- goto fail;
- if (!(SYNTAX(*text) & Sword))
- goto fail;
- if (text == textstart)
- goto continue_matching;
- if (!(SYNTAX(text[-1]) & Sword))
- goto continue_matching;
- goto fail;
- }
- case Cwordend:
- {
- if (text == textstart)
- goto fail;
- if (!(SYNTAX(text[-1]) & Sword))
- goto fail;
- if (text == textend)
- goto continue_matching;
- if (!(SYNTAX(*text) & Sword))
- goto continue_matching;
- goto fail;
- }
- case Cwordbound:
- {
- /* Note: as in gnu regexp, this also matches at the
- * beginning and end of buffer. */
-
- if (text == textstart || text == textend)
- goto continue_matching;
- if ((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword))
- goto continue_matching;
- goto fail;
- }
- case Cnotwordbound:
- {
- /* Note: as in gnu regexp, this never matches at the
- * beginning and end of buffer. */
- if (text == textstart || text == textend)
- goto fail;
- if (!((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword)))
- goto continue_matching;
- goto fail;
- }
- case Csyntaxspec:
- {
- NEXTCHAR(ch);
- if (!(SYNTAX(ch) & (unsigned char)*code++))
- goto fail;
- goto continue_matching;
- }
- case Cnotsyntaxspec:
- {
- NEXTCHAR(ch);
- if (SYNTAX(ch) & (unsigned char)*code++)
- goto fail;
- goto continue_matching;
- }
- default:
- {
- FREE_STATE(state);
- PyErr_SetString(PyExc_SystemError, "Unknown regex opcode: memory corrupted?");
- return -2;
- /*NOTREACHED*/
- }
- }
-
-
-
-#if 0 /* This line is never reached --Guido */
- abort();
-#endif
- /*
- *NOTREACHED
- */
-
- /* Using "break;" in the above switch statement is equivalent to "goto fail;" */
- fail:
- POP_FAILURE(state, code, text, goto done_matching, goto error);
- goto continue_matching;
-
- done_matching:
-/* if(translated != NULL) */
-/* free(translated); */
- FREE_STATE(state);
- return -1;
-
- error:
-/* if (translated != NULL) */
-/* free(translated); */
- FREE_STATE(state);
- return -2;
-}
-
-
-#undef PREFETCH
-#undef NEXTCHAR
-
-int re_search(regexp_t bufp, unsigned char *string, int size, int pos,
- int range, regexp_registers_t regs)
-{
- unsigned char *fastmap;
- unsigned char *translate;
- unsigned char *text;
- unsigned char *partstart;
- unsigned char *partend;
- int dir;
- int ret;
- unsigned char anchor;
-
- assert(size >= 0 && pos >= 0);
- assert(pos + range >= 0 && pos + range <= size); /* Bugfix by ylo */
-
- fastmap = bufp->fastmap;
- translate = bufp->translate;
- if (fastmap && !bufp->fastmap_accurate) {
- re_compile_fastmap(bufp);
- if (PyErr_Occurred()) return -2;
- }
-
- anchor = bufp->anchor;
- if (bufp->can_be_null == 1) /* can_be_null == 2: can match null at eob */
- fastmap = NULL;
-
- if (range < 0)
- {
- dir = -1;
- range = -range;
- }
- else
- dir = 1;
-
- if (anchor == 2) {
- if (pos != 0)
- return -1;
- else
- range = 0;
- }
-
- for (; range >= 0; range--, pos += dir)
- {
- if (fastmap)
- {
- if (dir == 1)
- { /* searching forwards */
-
- text = string + pos;
- partend = string + size;
- partstart = text;
- if (translate)
- while (text != partend &&
- !fastmap[(unsigned char) translate[(unsigned char)*text]])
- text++;
- else
- while (text != partend && !fastmap[(unsigned char)*text])
- text++;
- pos += text - partstart;
- range -= text - partstart;
- if (pos == size && bufp->can_be_null == 0)
- return -1;
- }
- else
- { /* searching backwards */
- text = string + pos;
- partstart = string + pos - range;
- partend = text;
- if (translate)
- while (text != partstart &&
- !fastmap[(unsigned char)
- translate[(unsigned char)*text]])
- text--;
- else
- while (text != partstart &&
- !fastmap[(unsigned char)*text])
- text--;
- pos -= partend - text;
- range -= partend - text;
- }
- }
- if (anchor == 1)
- { /* anchored to begline */
- if (pos > 0 && (string[pos - 1] != '\n'))
- continue;
- }
- assert(pos >= 0 && pos <= size);
- ret = re_match(bufp, string, size, pos, regs);
- if (ret >= 0)
- return pos;
- if (ret == -2)
- return -2;
- }
- return -1;
-}
-
-/*
-** Local Variables:
-** mode: c
-** c-file-style: "python"
-** End:
-*/
+++ /dev/null
-/*
- * -*- mode: c-mode; c-file-style: python -*-
- */
-
-#ifndef Py_REGEXPR_H
-#define Py_REGEXPR_H
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * regexpr.h
- *
- * Author: Tatu Ylonen <ylo@ngs.fi>
- *
- * Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
- *
- * Permission to use, copy, modify, distribute, and sell this software
- * and its documentation for any purpose is hereby granted without fee,
- * provided that the above copyright notice appear in all copies. This
- * software is provided "as is" without express or implied warranty.
- *
- * Created: Thu Sep 26 17:15:36 1991 ylo
- * Last modified: Mon Nov 4 15:49:46 1991 ylo
- */
-
-/* $Id$ */
-
-#ifndef REGEXPR_H
-#define REGEXPR_H
-
-#define RE_NREGS 100 /* number of registers available */
-
-typedef struct re_pattern_buffer
-{
- unsigned char *buffer; /* compiled pattern */
- int allocated; /* allocated size of compiled pattern */
- int used; /* actual length of compiled pattern */
- unsigned char *fastmap; /* fastmap[ch] is true if ch can start pattern */
- unsigned char *translate; /* translation to apply during compilation/matching */
- unsigned char fastmap_accurate; /* true if fastmap is valid */
- unsigned char can_be_null; /* true if can match empty string */
- unsigned char uses_registers; /* registers are used and need to be initialized */
- int num_registers; /* number of registers used */
- unsigned char anchor; /* anchor: 0=none 1=begline 2=begbuf */
-} *regexp_t;
-
-typedef struct re_registers
-{
- int start[RE_NREGS]; /* start offset of region */
- int end[RE_NREGS]; /* end offset of region */
-} *regexp_registers_t;
-
-/* bit definitions for syntax */
-#define RE_NO_BK_PARENS 1 /* no quoting for parentheses */
-#define RE_NO_BK_VBAR 2 /* no quoting for vertical bar */
-#define RE_BK_PLUS_QM 4 /* quoting needed for + and ? */
-#define RE_TIGHT_VBAR 8 /* | binds tighter than ^ and $ */
-#define RE_NEWLINE_OR 16 /* treat newline as or */
-#define RE_CONTEXT_INDEP_OPS 32 /* ^$?*+ are special in all contexts */
-#define RE_ANSI_HEX 64 /* ansi sequences (\n etc) and \xhh */
-#define RE_NO_GNU_EXTENSIONS 128 /* no gnu extensions */
-
-/* definitions for some common regexp styles */
-#define RE_SYNTAX_AWK (RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_CONTEXT_INDEP_OPS)
-#define RE_SYNTAX_EGREP (RE_SYNTAX_AWK|RE_NEWLINE_OR)
-#define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR)
-#define RE_SYNTAX_EMACS 0
-
-#define Sword 1
-#define Swhitespace 2
-#define Sdigit 4
-#define Soctaldigit 8
-#define Shexdigit 16
-
-/* Rename all exported symbols to avoid conflicts with similarly named
- symbols in some systems' standard C libraries... */
-
-#define re_syntax _Py_re_syntax
-#define re_syntax_table _Py_re_syntax_table
-#define re_compile_initialize _Py_re_compile_initialize
-#define re_set_syntax _Py_re_set_syntax
-#define re_compile_pattern _Py_re_compile_pattern
-#define re_match _Py_re_match
-#define re_search _Py_re_search
-#define re_compile_fastmap _Py_re_compile_fastmap
-#define re_comp _Py_re_comp
-#define re_exec _Py_re_exec
-
-#ifdef HAVE_PROTOTYPES
-
-extern int re_syntax;
-/* This is the actual syntax mask. It was added so that Python could do
- * syntax-dependent munging of patterns before compilation. */
-
-extern unsigned char re_syntax_table[256];
-
-void re_compile_initialize(void);
-
-int re_set_syntax(int syntax);
-/* This sets the syntax to use and returns the previous syntax. The
- * syntax is specified by a bit mask of the above defined bits. */
-
-char *re_compile_pattern(unsigned char *regex, int regex_size, regexp_t compiled);
-/* This compiles the regexp (given in regex and length in regex_size).
- * This returns NULL if the regexp compiled successfully, and an error
- * message if an error was encountered. The buffer field must be
- * initialized to a memory area allocated by malloc (or to NULL) before
- * use, and the allocated field must be set to its length (or 0 if
- * buffer is NULL). Also, the translate field must be set to point to a
- * valid translation table, or NULL if it is not used. */
-
-int re_match(regexp_t compiled, unsigned char *string, int size, int pos,
- regexp_registers_t old_regs);
-/* This tries to match the regexp against the string. This returns the
- * length of the matched portion, or -1 if the pattern could not be
- * matched and -2 if an error (such as failure stack overflow) is
- * encountered. */
-
-int re_search(regexp_t compiled, unsigned char *string, int size, int startpos,
- int range, regexp_registers_t regs);
-/* This searches for a substring matching the regexp. This returns the
- * first index at which a match is found. range specifies at how many
- * positions to try matching; positive values indicate searching
- * forwards, and negative values indicate searching backwards. mstop
- * specifies the offset beyond which a match must not go. This returns
- * -1 if no match is found, and -2 if an error (such as failure stack
- * overflow) is encountered. */
-
-void re_compile_fastmap(regexp_t compiled);
-/* This computes the fastmap for the regexp. For this to have any effect,
- * the calling program must have initialized the fastmap field to point
- * to an array of 256 characters. */
-
-#else /* HAVE_PROTOTYPES */
-
-extern int re_syntax;
-extern unsigned char re_syntax_table[256];
-void re_compile_initialize();
-int re_set_syntax();
-char *re_compile_pattern();
-int re_match();
-int re_search();
-void re_compile_fastmap();
-
-#endif /* HAVE_PROTOTYPES */
-
-#endif /* REGEXPR_H */
-
-
-
-#ifdef __cplusplus
-}
-#endif
-#endif /* !Py_REGEXPR_H */
# End Source File\r
# Begin Source File\r
\r
-SOURCE=..\..\Modules\regexmodule.c\r
-# End Source File\r
-# Begin Source File\r
-\r
-SOURCE=..\..\Modules\regexpr.c\r
-# End Source File\r
-# Begin Source File\r
-\r
SOURCE=..\..\Modules\rgbimgmodule.c\r
# End Source File\r
# Begin Source File\r
Modules/md5module.c \
Modules/operator.c \
Modules/_randommodule.c \
- Modules/regexmodule.c \
- Modules/regexpr.c \
Modules/rgbimgmodule.c \
Modules/shamodule.c \
Modules/_sre.c \
$(PY_INCLUDE)\sliceobject.h $(PY_INCLUDE)\stringobject.h \
$(PY_INCLUDE)\sysmodule.h $(PY_INCLUDE)\traceback.h $(PY_INCLUDE)\tupleobject.h
-regexmodule.obj: $(PY_INCLUDE)\abstract.h $(PY_INCLUDE)\ceval.h \
- $(PY_INCLUDE)\classobject.h $(PY_INCLUDE)\cobject.h $(PY_INCLUDE)\complexobject.h \
- pyconfig.h $(PY_INCLUDE)\dictobject.h $(PY_INCLUDE)\fileobject.h \
- $(PY_INCLUDE)\floatobject.h $(PY_INCLUDE)\funcobject.h $(PY_INCLUDE)\import.h \
- $(PY_INCLUDE)\intobject.h $(PY_INCLUDE)\intrcheck.h $(PY_INCLUDE)\listobject.h \
- $(PY_INCLUDE)\longobject.h $(PY_INCLUDE)\methodobject.h \
- $(PY_INCLUDE)\modsupport.h $(PY_INCLUDE)\moduleobject.h $(PY_INCLUDE)\mymalloc.h \
- $(PY_INCLUDE)\myproto.h $(PY_INCLUDE)\object.h $(PY_INCLUDE)\objimpl.h \
- $(PY_INCLUDE)\pydebug.h $(PY_INCLUDE)\pyerrors.h $(PY_INCLUDE)\pyfpe.h \
- $(PY_INCLUDE)\pystate.h $(PY_INCLUDE)\python.h $(PY_INCLUDE)\pythonrun.h \
- $(PY_INCLUDE)\rangeobject.h $(PY_MODULES)\regexpr.h $(PY_INCLUDE)\sliceobject.h \
- $(PY_INCLUDE)\stringobject.h $(PY_INCLUDE)\sysmodule.h $(PY_INCLUDE)\traceback.h \
- $(PY_INCLUDE)\tupleobject.h
-
-regexpr.obj: $(PY_INCLUDE)\abstract.h $(PY_INCLUDE)\ceval.h \
- $(PY_INCLUDE)\classobject.h $(PY_INCLUDE)\cobject.h $(PY_INCLUDE)\complexobject.h \
- pyconfig.h $(PY_INCLUDE)\dictobject.h $(PY_INCLUDE)\fileobject.h \
- $(PY_INCLUDE)\floatobject.h $(PY_INCLUDE)\funcobject.h $(PY_INCLUDE)\import.h \
- $(PY_INCLUDE)\intobject.h $(PY_INCLUDE)\intrcheck.h $(PY_INCLUDE)\listobject.h \
- $(PY_INCLUDE)\longobject.h $(PY_INCLUDE)\methodobject.h \
- $(PY_INCLUDE)\modsupport.h $(PY_INCLUDE)\moduleobject.h $(PY_INCLUDE)\mymalloc.h \
- $(PY_INCLUDE)\myproto.h $(PY_INCLUDE)\object.h $(PY_INCLUDE)\objimpl.h \
- $(PY_INCLUDE)\pydebug.h $(PY_INCLUDE)\pyerrors.h $(PY_INCLUDE)\pyfpe.h \
- $(PY_INCLUDE)\pystate.h $(PY_INCLUDE)\python.h $(PY_INCLUDE)\pythonrun.h \
- $(PY_INCLUDE)\rangeobject.h $(PY_MODULES)\regexpr.h $(PY_INCLUDE)\sliceobject.h \
- $(PY_INCLUDE)\stringobject.h $(PY_INCLUDE)\sysmodule.h $(PY_INCLUDE)\traceback.h \
- $(PY_INCLUDE)\tupleobject.h
-
resource.obj: $(PY_INCLUDE)\abstract.h $(OS2TCPIP)\Include\sys\time.h $(PY_INCLUDE)\ceval.h \
$(PY_INCLUDE)\classobject.h $(PY_INCLUDE)\cobject.h $(PY_INCLUDE)\complexobject.h \
pyconfig.h $(PY_INCLUDE)\dictobject.h $(PY_INCLUDE)\fileobject.h \
pythonrun.h rangeobject.h sliceobject.h stringobject.h sysmodule.h \
traceback.h tupleobject.h
-regexmodule.obj: abstract.h ceval.h classobject.h cobject.h complexobject.h \
- pyconfig.h dictobject.h fileobject.h floatobject.h funcobject.h \
- import.h intobject.h intrcheck.h listobject.h longobject.h \
- methodobject.h modsupport.h moduleobject.h mymalloc.h myproto.h \
- object.h objimpl.h pydebug.h pyerrors.h pyfpe.h pystate.h python.h \
- pythonrun.h rangeobject.h regexpr.h sliceobject.h stringobject.h \
- sysmodule.h traceback.h tupleobject.h
-
-regexpr.obj: abstract.h ceval.h classobject.h cobject.h \
- complexobject.h pyconfig.h dictobject.h fileobject.h floatobject.h \
- funcobject.h import.h intobject.h intrcheck.h listobject.h \
- longobject.h methodobject.h modsupport.h moduleobject.h mymalloc.h \
- myproto.h object.h objimpl.h pydebug.h pyerrors.h pyfpe.h \
- pystate.h python.h pythonrun.h rangeobject.h regexpr.h \
- sliceobject.h stringobject.h sysmodule.h traceback.h tupleobject.h
-
-reopmodule.obj: abstract.h ceval.h classobject.h cobject.h complexobject.h \
- pyconfig.h dictobject.h fileobject.h floatobject.h funcobject.h \
- import.h intobject.h intrcheck.h listobject.h longobject.h \
- methodobject.h modsupport.h moduleobject.h mymalloc.h myproto.h \
- object.h objimpl.h pydebug.h pyerrors.h pyfpe.h pystate.h python.h \
- pythonrun.h rangeobject.h regexpr.h sliceobject.h stringobject.h \
- sysmodule.h traceback.h tupleobject.h
-
resource.obj: abstract.h c:\mptn\include\sys\time.h ceval.h classobject.h \
cobject.h complexobject.h pyconfig.h dictobject.h fileobject.h \
floatobject.h funcobject.h import.h intobject.h intrcheck.h \
# change this module too.
try:
- import string
+ import os
except:
- print """Could not import the standard "string" module.
+ print """Could not import the standard "os" module.
Please check your PYTHONPATH environment variable."""
sys.exit(1)
try:
- import regex_syntax
+ import symbol
except:
- print """Could not import the standard "regex_syntax" module. If this is
+ print """Could not import the standard "symbol" module. If this is
a PC, you should add the dos_8x3 directory to your PYTHONPATH."""
sys.exit(1)
import os
for dir in sys.path:
- file = os.path.join(dir, "string.py")
+ file = os.path.join(dir, "os.py")
if os.path.isfile(file):
test = os.path.join(dir, "test")
if os.path.isdir(test):
<File
RelativePath="..\Objects\rangeobject.c">
</File>
- <File
- RelativePath="..\Modules\regexmodule.c">
- </File>
- <File
- RelativePath="..\Modules\regexpr.c">
- </File>
<File
RelativePath="..\Modules\rgbimgmodule.c">
</File>
@.^.Lib.md5/pyd\
@.^.Lib.operator/pyd\
@.^.Lib.parser/pyd\
- @.^.Lib.regex/pyd\
@.^.Lib.rgbimg/pyd\
@.^.Lib.sha/pyd\
@.^.Lib.signal/pyd\
@.^.Lib.parser/pyd: @.^.Modules.o.parsermodule s.linktab
$(MAKEDLK) -d @.^.Lib.parser/pyd -s s.linktab -o @.^.Modules.o.parsermodule -e initparser
-@.^.Lib.regex/pyd: @.^.Modules.o.regexmodule @.^.Modules.o.regexpr s.linktab
- $(LINK) -aof -o @.^.Modules.o.regexlink @.^.Modules.o.regexmodule @.^.Modules.o.regexpr
- $(MAKEDLK) -d @.^.Lib.regex/pyd -s s.linktab -o @.^.Modules.o.regexlink -e initregex
-
@.^.Lib.rgbimg/pyd: @.^.Modules.o.rgbimgmodule s.linktab
$(MAKEDLK) -d @.^.Lib.rgbimg/pyd -s s.linktab -o @.^.Modules.o.rgbimgmodule -e initrgbimg
# into a program for a different change to Python programs...
import sys
-import regex
+import re
import os
from stat import *
if fix(arg): bad = 1
sys.exit(bad)
-ispythonprog = regex.compile('^[a-zA-Z0-9_]+\.py$')
+ispythonprog = re.compile('^[a-zA-Z0-9_]+\.py$')
def ispython(name):
return ispythonprog.match(name) >= 0
# This expression doesn't catch *all* class definition headers,
# but it's pretty darn close.
-classexpr = '^\([ \t]*class +[a-zA-Z0-9_]+\) *( *) *\(\(=.*\)?\):'
-classprog = regex.compile(classexpr)
+classexpr = '^([ \t]*class +[a-zA-Z0-9_]+) *( *) *((=.*)?):'
+classprog = re.compile(classexpr)
# Expressions for finding base class expressions.
-baseexpr = '^ *\(.*\) *( *) *$'
-baseprog = regex.compile(baseexpr)
+baseexpr = '^ *(.*) *( *) *$'
+baseprog = re.compile(baseexpr)
def fixline(line):
if classprog.match(line) < 0: # No 'class' keyword -- no change
# files.
import sys
-import regex
+import re
import os
from stat import *
import getopt
# Change this regular expression to select a different set of files
Wanted = '^[a-zA-Z0-9_]+\.[ch]$'
def wanted(name):
- return regex.match(Wanted, name) >= 0
+ return re.match(Wanted, name) >= 0
def recursedown(dirname):
dbg('recursedown(%r)\n' % (dirname,))
# Anything else is an operator -- don't list this explicitly because of '/*'
OutsideComment = (Identifier, Number, String, Char, CommentStart)
-OutsideCommentPattern = '\(' + '\|'.join(OutsideComment) + '\)'
-OutsideCommentProgram = regex.compile(OutsideCommentPattern)
+OutsideCommentPattern = '(' + '|'.join(OutsideComment) + ')'
+OutsideCommentProgram = re.compile(OutsideCommentPattern)
InsideComment = (Identifier, Number, CommentEnd)
-InsideCommentPattern = '\(' + '\|'.join(InsideComment) + '\)'
-InsideCommentProgram = regex.compile(InsideCommentPattern)
+InsideCommentPattern = '(' + '|'.join(InsideComment) + ')'
+InsideCommentProgram = re.compile(InsideCommentPattern)
def initfixline():
global Program
# preprocessor commands.
import sys
-import regex
import getopt
defs = []
# into a program for a different change to Python programs...
import sys
-import regex
+import re
import os
from stat import *
if fix(arg): bad = 1
sys.exit(bad)
-ispythonprog = regex.compile('^[a-zA-Z0-9_]+\.py$')
+ispythonprog = re.compile('^[a-zA-Z0-9_]+\.py$')
def ispython(name):
return ispythonprog.match(name) >= 0
if lineno == 1 and g is None and line[:2] == '#!':
# Check for non-Python scripts
words = line[2:].split()
- if words and regex.search('[pP]ython', words[0]) < 0:
+ if words and re.search('[pP]ython', words[0]) < 0:
msg = filename + ': ' + words[0]
msg = msg + ' script; not fixed\n'
err(msg)
return 0
-fixpat = '^[ \t]+def +[a-zA-Z0-9_]+ *( *self *, *\(( *\(.*\) *)\) *) *:'
-fixprog = regex.compile(fixpat)
+fixpat = '^[ \t]+def +[a-zA-Z0-9_]+ *( *self *, *(( *(.*) *)) *) *:'
+fixprog = re.compile(fixpat)
def fixline(line):
if fixprog.match(line) >= 0:
import sys
import os
import getopt
-import regex
+import re
# Types of symbols.
#
# Regular expression to parse "nm -o" output.
#
-matcher = regex.compile('\(.*\):\t?........ \(.\) \(.*\)$')
+matcher = re.compile('(.*):\t?........ (.) (.*)$')
# Store "item" in "dict" under "key".
# The dictionary maps keys to lists of items.
# into a program for a different change to Python programs...
import sys
-import regex
+import re
import os
from stat import *
import getopt
if fix(arg): bad = 1
sys.exit(bad)
-ispythonprog = regex.compile('^[a-zA-Z0-9_]+\.py$')
+ispythonprog = re.compile('^[a-zA-Z0-9_]+\.py$')
def ispython(name):
return ispythonprog.match(name) >= 0
import sys
-import regex
+import re
import os
# Compiled regular expressions to search for import statements
#
-m_import = regex.compile('^[ \t]*from[ \t]+\([^ \t]+\)[ \t]+')
-m_from = regex.compile('^[ \t]*import[ \t]+\([^#]+\)')
+m_import = re.compile('^[ \t]*from[ \t]+([^ \t]+)[ \t]+')
+m_from = re.compile('^[ \t]*import[ \t]+([^#]+)')
# Collect data from one file
#
# Some modules that are normally always on:
- exts.append( Extension('regex', ['regexmodule.c', 'regexpr.c']) )
-
exts.append( Extension('_weakref', ['_weakref.c']) )
# array objects