From 6a2a3dbec59f267e05c0c507457dfd234263237b Mon Sep 17 00:00:00 2001 From: Greg Ward Date: Sat, 24 Jun 2000 20:40:02 +0000 Subject: [PATCH] Added 'split_quoted()' function to deal with strings that are quoted in Unix shell-like syntax (eg. in Python's Makefile, for one thing -- now that I have this function, I'll probably allow quoted strings in config files too. --- Lib/distutils/util.py | 67 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/Lib/distutils/util.py b/Lib/distutils/util.py index 74df8aa6a4..5c1de78997 100644 --- a/Lib/distutils/util.py +++ b/Lib/distutils/util.py @@ -166,3 +166,70 @@ def grok_environment_error (exc, prefix="error: "): error = prefix + str(exc[-1]) return error + + +# Needed by 'split_quoted()' +_wordchars_re = re.compile(r'[^\\\'\"\ ]*') +_squote_re = re.compile(r"'(?:[^'\\]|\\.)*'") +_dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"') + +def split_quoted (s): + """Split a string up according to Unix shell-like rules for quotes and + backslashes. In short: words are delimited by spaces, as long as those + spaces are not escaped by a backslash, or inside a quoted string. + Single and double quotes are equivalent, and the quote characters can + be backslash-escaped. The backslash is stripped from any two-character + escape sequence, leaving only the escaped character. The quote + characters are stripped from any quoted string. Returns a list of + words. + """ + + # This is a nice algorithm for splitting up a single string, since it + # doesn't require character-by-character examination. It was a little + # bit of a brain-bender to get it working right, though... + + s = string.strip(s) + words = [] + pos = 0 + + while s: + m = _wordchars_re.match(s, pos) + end = m.end() + if end == len(s): + words.append(s[:end]) + break + + if s[end] == ' ': # unescaped, unquoted space: now + words.append(s[:end]) # we definitely have a word delimiter + s = string.lstrip(s[end:]) + pos = 0 + + elif s[end] == '\\': # preserve whatever is being escaped; + # will become part of the current word + s = s[:end] + s[end+1:] + pos = end+1 + + else: + if s[end] == "'": # slurp singly-quoted string + m = _squote_re.match(s, end) + elif s[end] == '"': # slurp doubly-quoted string + m = _dquote_re.match(s, end) + else: + raise RuntimeError, \ + "this can't happen (bad char '%c')" % s[end] + + if m is None: + raise ValueError, \ + "bad string (mismatched %s quotes?)" % s[end] + + (beg, end) = m.span() + s = s[:beg] + s[beg+1:end-1] + s[end:] + pos = m.end() - 2 + + if pos >= len(s): + words.append(s) + break + + return words + +# split_quoted () -- 2.40.0