From: Daniel Dunbar <daniel@zuster.org>
Date: Sat, 1 Aug 2009 03:22:27 +0000 (+0000)
Subject: MultiTestRunner: Add 'sh' parsing to ShUtil.
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=93fe03fb77dc4bb660775ef3447584182f60f018;p=clang

MultiTestRunner: Add 'sh' parsing to ShUtil.


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@77765 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/utils/test/ShUtil.py b/utils/test/ShUtil.py
index b8485b1596..3878f5961d 100644
--- a/utils/test/ShUtil.py
+++ b/utils/test/ShUtil.py
@@ -1,3 +1,5 @@
+import itertools
+
 import Util
 
 class ShLexer:
@@ -23,15 +25,42 @@ class ShLexer:
             return True
         return False
 
-    def lex_arg(self, c):
+    def lex_arg_fast(self, c):
+        # Get the leading whitespace free section.
+        chunk = self.data[self.pos - 1:].split(None, 1)[0]
+        
+        # If it has special characters, the fast path failed.
+        if ('|' in chunk or '&' in chunk or 
+            '<' in chunk or '>' in chunk or
+            "'" in chunk or '"' in chunk):
+            return None
+        
+        self.pos = self.pos - 1 + len(chunk)
+        return chunk
+        
+    def lex_arg_slow(self, c):
         if c in "'\"":
             str = self.lex_arg_quoted(c)
         else:
             str = c
         while self.pos != self.end:
             c = self.look()
-            if c.isspace() or c in "|><&":
+            if c.isspace() or c in "|&":
                 break
+            elif c in '><':
+                # This is an annoying case; we treat '2>' as a single token so
+                # we don't have to track whitespace tokens.
+
+                # If the parse string isn't an integer, do the usual thing.
+                if not str.isdigit():
+                    break
+
+                # Otherwise, lex the operator and convert to a redirection
+                # token.
+                num = int(str)
+                tok = self.lex_one_token()
+                assert isinstance(tok, tuple) and len(tok) == 1
+                return (tok[0], num)                    
             elif c == '"':
                 self.eat()
                 str += self.lex_arg_quoted('"')
@@ -60,14 +89,31 @@ class ShLexer:
                 str += c
         Util.warning("missing quote character in %r" % self.data)
         return str
+    
+    def lex_arg_checked(self, c):
+        pos = self.pos
+        res = self.lex_arg_fast(c)
+        end = self.pos
 
+        self.pos = pos
+        reference = self.lex_arg_slow(c)
+        if res is not None:
+            if res != reference:
+                raise ValueError,"Fast path failure: %r != %r" % (res, reference)
+            if self.pos != end:
+                raise ValueError,"Fast path failure: %r != %r" % (self.pos, end)
+        return reference
+        
+    def lex_arg(self, c):
+        return self.lex_arg_fast(c) or self.lex_arg_slow(c)
+        
     def lex_one_token(self):
         """
         lex_one_token - Lex a single 'sh' token. """
 
         c = self.eat()
-        if c == ';':
-            return (c)
+        if c in ';!':
+            return (c,)
         if c == '|':
             if self.maybe_eat('|'):
                 return ('||',)
@@ -89,6 +135,7 @@ class ShLexer:
                 return ('<&',)
             if self.maybe_eat('>'):
                 return ('<<',)
+
         return self.lex_arg(c)
 
     def lex(self):
@@ -100,19 +147,151 @@ class ShLexer:
 
 ###
 
+class Command:
+    def __init__(self, args, redirects):
+        self.args = list(args)
+        self.redirects = list(redirects)
+        
+    def __repr__(self):
+        return 'Command(%r, %r)' % (self.args, self.redirects)
+
+    def __cmp__(self, other):
+        if not isinstance(other, Command):
+            return -1
+
+        return cmp((self.args, self.redirects),
+                   (other.args, other.redirects))
+
+class Pipeline:
+    def __init__(self, commands, negate):
+        self.commands = commands
+        self.negate = negate
+
+    def __repr__(self):
+        return 'Pipeline(%r, %r)' % (self.commands, self.negate)
+
+    def __cmp__(self, other):
+        if not isinstance(other, Pipeline):
+            return -1
+
+        return cmp((self.commands, self.negate), 
+                   (other.commands, other.negate))
+
+class Seq:
+    def __init__(self, lhs, op, rhs):
+        assert op in (';', '&', '||', '&&')
+        self.op = op
+        self.lhs = lhs
+        self.rhs = rhs
+    
+    def __repr__(self):
+        return 'Seq(%r, %r, %r)' % (self.lhs, self.op, self.rhs)
+
+    def __cmp__(self, other):
+        if not isinstance(other, Seq):
+            return -1
+
+        return cmp((self.lhs, self.op, self.rhs), 
+                   (other.lhs, other.op, other.rhs))
+
+class ShParser:
+    def __init__(self, data):
+        self.data = data
+        self.tokens = ShLexer(data).lex()
+    
+    def lex(self):
+        try:
+            return self.tokens.next()
+        except StopIteration:
+            return None
+    
+    def look(self):
+        next = self.lex()
+        if next:
+            self.tokens = itertools.chain([next], self.tokens)
+        return next
+    
+    def parse_command(self):
+        tok = self.lex()
+        if not tok:
+            raise ValueError,"empty command!"
+        if isinstance(tok, tuple):
+            raise ValueError,"syntax error near unexpected token %r" % tok[0]
+        
+        args = [tok]
+        redirects = []
+        while 1:
+            tok = self.look()
+
+            # EOF?
+            if tok is None:
+                break
+
+            # If this is an argument, just add it to the current command.
+            if isinstance(tok, str):
+                args.append(self.lex())
+                continue
+
+            # Otherwise see if it is a terminator.
+            assert isinstance(tok, tuple)
+            if tok[0] in ('|',';','&','||','&&'):
+                break
+            
+            # Otherwise it must be a redirection.
+            op = self.lex()
+            arg = self.lex()
+            if not arg:
+                raise ValueError,"syntax error near token %r" % op[0]
+            redirects.append((op, arg))
+
+        return Command(args, redirects)
+
+    def parse_pipeline(self):
+        negate = False
+        if self.look() == ('!',):
+            self.lex()
+            negate = True
+
+        commands = [self.parse_command()]
+        while self.look() == ('|',):
+            self.lex()
+            commands.append(self.parse_command())
+        return Pipeline(commands, negate)
+            
+    def parse(self):
+        lhs = self.parse_pipeline()
+
+        while self.look():
+            operator = self.lex()
+            assert isinstance(operator, tuple) and len(operator) == 1
+
+            if not self.look():
+                raise ValueError, "missing argument to operator %r" % operator[0]
+            
+            # FIXME: Operator precedence!!
+            lhs = Seq(lhs, operator[0], self.parse_pipeline())
+
+        return lhs
+
+###
+
 import unittest
 
 class TestShLexer(unittest.TestCase):
     def lex(self, str):
         return list(ShLexer(str).lex())
 
-    def testops(self):
+    def test_basic(self):
+        self.assertEqual(self.lex('a|b>c&d'),
+                         ['a', ('|',), 'b', ('>',), 'c', ('&',), 'd'])
+
+    def test_redirection_tokens(self):
         self.assertEqual(self.lex('a2>c'),
                          ['a2', ('>',), 'c'])
         self.assertEqual(self.lex('a 2>c'),
-                         ['a', '2', ('>',), 'c'])
+                         ['a', ('>',2), 'c'])
         
-    def testquoting(self):
+    def test_quoting(self):
         self.assertEqual(self.lex(""" 'a' """),
                          ['a'])
         self.assertEqual(self.lex(""" "hello\\"world" """),
@@ -122,5 +301,65 @@ class TestShLexer(unittest.TestCase):
         self.assertEqual(self.lex(""" he"llo wo"rld """),
                          ["hello world"])
 
+class TestShParse(unittest.TestCase):
+    def parse(self, str):
+        return ShParser(str).parse()
+
+    def test_basic(self):
+        self.assertEqual(self.parse('echo hello'),
+                         Pipeline([Command(['echo', 'hello'], [])], False))
+
+    def test_redirection(self):
+        self.assertEqual(self.parse('echo hello > c'),
+                         Pipeline([Command(['echo', 'hello'], 
+                                           [((('>'),), 'c')])], False))
+        self.assertEqual(self.parse('echo hello > c >> d'),
+                         Pipeline([Command(['echo', 'hello'], [(('>',), 'c'),
+                                                     (('>>',), 'd')])], False))
+
+    def test_pipeline(self):
+        self.assertEqual(self.parse('a | b'),
+                         Pipeline([Command(['a'], []),
+                                   Command(['b'], [])],
+                                  False))
+
+        self.assertEqual(self.parse('a | b | c'),
+                         Pipeline([Command(['a'], []),
+                                   Command(['b'], []),
+                                   Command(['c'], [])],
+                                  False))
+
+        self.assertEqual(self.parse('! a'),
+                         Pipeline([Command(['a'], [])],
+                                  True))
+
+    def test_list(self):        
+        self.assertEqual(self.parse('a ; b'),
+                         Seq(Pipeline([Command(['a'], [])], False),
+                             ';',
+                             Pipeline([Command(['b'], [])], False)))
+
+        self.assertEqual(self.parse('a & b'),
+                         Seq(Pipeline([Command(['a'], [])], False),
+                             '&',
+                             Pipeline([Command(['b'], [])], False)))
+
+        self.assertEqual(self.parse('a && b'),
+                         Seq(Pipeline([Command(['a'], [])], False),
+                             '&&',
+                             Pipeline([Command(['b'], [])], False)))
+
+        self.assertEqual(self.parse('a || b'),
+                         Seq(Pipeline([Command(['a'], [])], False),
+                             '||',
+                             Pipeline([Command(['b'], [])], False)))
+
+        self.assertEqual(self.parse('a && b || c'),
+                         Seq(Seq(Pipeline([Command(['a'], [])], False),
+                                 '&&',
+                                 Pipeline([Command(['b'], [])], False)),
+                             '||',
+                             Pipeline([Command(['c'], [])], False)))
+        
 if __name__ == '__main__':
     unittest.main()