From: Daniel Dunbar Date: Sat, 1 Aug 2009 03:22:27 +0000 (+0000) Subject: MultiTestRunner: Add 'sh' parsing to ShUtil. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=93fe03fb77dc4bb660775ef3447584182f60f018;p=clang MultiTestRunner: Add 'sh' parsing to ShUtil. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@77765 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/utils/test/ShUtil.py b/utils/test/ShUtil.py index b8485b1596..3878f5961d 100644 --- a/utils/test/ShUtil.py +++ b/utils/test/ShUtil.py @@ -1,3 +1,5 @@ +import itertools + import Util class ShLexer: @@ -23,15 +25,42 @@ class ShLexer: return True return False - def lex_arg(self, c): + def lex_arg_fast(self, c): + # Get the leading whitespace free section. + chunk = self.data[self.pos - 1:].split(None, 1)[0] + + # If it has special characters, the fast path failed. + if ('|' in chunk or '&' in chunk or + '<' in chunk or '>' in chunk or + "'" in chunk or '"' in chunk): + return None + + self.pos = self.pos - 1 + len(chunk) + return chunk + + def lex_arg_slow(self, c): if c in "'\"": str = self.lex_arg_quoted(c) else: str = c while self.pos != self.end: c = self.look() - if c.isspace() or c in "|><&": + if c.isspace() or c in "|&": break + elif c in '><': + # This is an annoying case; we treat '2>' as a single token so + # we don't have to track whitespace tokens. + + # If the parse string isn't an integer, do the usual thing. + if not str.isdigit(): + break + + # Otherwise, lex the operator and convert to a redirection + # token. + num = int(str) + tok = self.lex_one_token() + assert isinstance(tok, tuple) and len(tok) == 1 + return (tok[0], num) elif c == '"': self.eat() str += self.lex_arg_quoted('"') @@ -60,14 +89,31 @@ class ShLexer: str += c Util.warning("missing quote character in %r" % self.data) return str + + def lex_arg_checked(self, c): + pos = self.pos + res = self.lex_arg_fast(c) + end = self.pos + self.pos = pos + reference = self.lex_arg_slow(c) + if res is not None: + if res != reference: + raise ValueError,"Fast path failure: %r != %r" % (res, reference) + if self.pos != end: + raise ValueError,"Fast path failure: %r != %r" % (self.pos, end) + return reference + + def lex_arg(self, c): + return self.lex_arg_fast(c) or self.lex_arg_slow(c) + def lex_one_token(self): """ lex_one_token - Lex a single 'sh' token. """ c = self.eat() - if c == ';': - return (c) + if c in ';!': + return (c,) if c == '|': if self.maybe_eat('|'): return ('||',) @@ -89,6 +135,7 @@ class ShLexer: return ('<&',) if self.maybe_eat('>'): return ('<<',) + return self.lex_arg(c) def lex(self): @@ -100,19 +147,151 @@ class ShLexer: ### +class Command: + def __init__(self, args, redirects): + self.args = list(args) + self.redirects = list(redirects) + + def __repr__(self): + return 'Command(%r, %r)' % (self.args, self.redirects) + + def __cmp__(self, other): + if not isinstance(other, Command): + return -1 + + return cmp((self.args, self.redirects), + (other.args, other.redirects)) + +class Pipeline: + def __init__(self, commands, negate): + self.commands = commands + self.negate = negate + + def __repr__(self): + return 'Pipeline(%r, %r)' % (self.commands, self.negate) + + def __cmp__(self, other): + if not isinstance(other, Pipeline): + return -1 + + return cmp((self.commands, self.negate), + (other.commands, other.negate)) + +class Seq: + def __init__(self, lhs, op, rhs): + assert op in (';', '&', '||', '&&') + self.op = op + self.lhs = lhs + self.rhs = rhs + + def __repr__(self): + return 'Seq(%r, %r, %r)' % (self.lhs, self.op, self.rhs) + + def __cmp__(self, other): + if not isinstance(other, Seq): + return -1 + + return cmp((self.lhs, self.op, self.rhs), + (other.lhs, other.op, other.rhs)) + +class ShParser: + def __init__(self, data): + self.data = data + self.tokens = ShLexer(data).lex() + + def lex(self): + try: + return self.tokens.next() + except StopIteration: + return None + + def look(self): + next = self.lex() + if next: + self.tokens = itertools.chain([next], self.tokens) + return next + + def parse_command(self): + tok = self.lex() + if not tok: + raise ValueError,"empty command!" + if isinstance(tok, tuple): + raise ValueError,"syntax error near unexpected token %r" % tok[0] + + args = [tok] + redirects = [] + while 1: + tok = self.look() + + # EOF? + if tok is None: + break + + # If this is an argument, just add it to the current command. + if isinstance(tok, str): + args.append(self.lex()) + continue + + # Otherwise see if it is a terminator. + assert isinstance(tok, tuple) + if tok[0] in ('|',';','&','||','&&'): + break + + # Otherwise it must be a redirection. + op = self.lex() + arg = self.lex() + if not arg: + raise ValueError,"syntax error near token %r" % op[0] + redirects.append((op, arg)) + + return Command(args, redirects) + + def parse_pipeline(self): + negate = False + if self.look() == ('!',): + self.lex() + negate = True + + commands = [self.parse_command()] + while self.look() == ('|',): + self.lex() + commands.append(self.parse_command()) + return Pipeline(commands, negate) + + def parse(self): + lhs = self.parse_pipeline() + + while self.look(): + operator = self.lex() + assert isinstance(operator, tuple) and len(operator) == 1 + + if not self.look(): + raise ValueError, "missing argument to operator %r" % operator[0] + + # FIXME: Operator precedence!! + lhs = Seq(lhs, operator[0], self.parse_pipeline()) + + return lhs + +### + import unittest class TestShLexer(unittest.TestCase): def lex(self, str): return list(ShLexer(str).lex()) - def testops(self): + def test_basic(self): + self.assertEqual(self.lex('a|b>c&d'), + ['a', ('|',), 'b', ('>',), 'c', ('&',), 'd']) + + def test_redirection_tokens(self): self.assertEqual(self.lex('a2>c'), ['a2', ('>',), 'c']) self.assertEqual(self.lex('a 2>c'), - ['a', '2', ('>',), 'c']) + ['a', ('>',2), 'c']) - def testquoting(self): + def test_quoting(self): self.assertEqual(self.lex(""" 'a' """), ['a']) self.assertEqual(self.lex(""" "hello\\"world" """), @@ -122,5 +301,65 @@ class TestShLexer(unittest.TestCase): self.assertEqual(self.lex(""" he"llo wo"rld """), ["hello world"]) +class TestShParse(unittest.TestCase): + def parse(self, str): + return ShParser(str).parse() + + def test_basic(self): + self.assertEqual(self.parse('echo hello'), + Pipeline([Command(['echo', 'hello'], [])], False)) + + def test_redirection(self): + self.assertEqual(self.parse('echo hello > c'), + Pipeline([Command(['echo', 'hello'], + [((('>'),), 'c')])], False)) + self.assertEqual(self.parse('echo hello > c >> d'), + Pipeline([Command(['echo', 'hello'], [(('>',), 'c'), + (('>>',), 'd')])], False)) + + def test_pipeline(self): + self.assertEqual(self.parse('a | b'), + Pipeline([Command(['a'], []), + Command(['b'], [])], + False)) + + self.assertEqual(self.parse('a | b | c'), + Pipeline([Command(['a'], []), + Command(['b'], []), + Command(['c'], [])], + False)) + + self.assertEqual(self.parse('! a'), + Pipeline([Command(['a'], [])], + True)) + + def test_list(self): + self.assertEqual(self.parse('a ; b'), + Seq(Pipeline([Command(['a'], [])], False), + ';', + Pipeline([Command(['b'], [])], False))) + + self.assertEqual(self.parse('a & b'), + Seq(Pipeline([Command(['a'], [])], False), + '&', + Pipeline([Command(['b'], [])], False))) + + self.assertEqual(self.parse('a && b'), + Seq(Pipeline([Command(['a'], [])], False), + '&&', + Pipeline([Command(['b'], [])], False))) + + self.assertEqual(self.parse('a || b'), + Seq(Pipeline([Command(['a'], [])], False), + '||', + Pipeline([Command(['b'], [])], False))) + + self.assertEqual(self.parse('a && b || c'), + Seq(Seq(Pipeline([Command(['a'], [])], False), + '&&', + Pipeline([Command(['b'], [])], False)), + '||', + Pipeline([Command(['c'], [])], False))) + if __name__ == '__main__': unittest.main()