]> granicus.if.org Git - python/commitdiff
bpo-36876: Re-organize the c-analyzer tool code. (gh-16841)
authorEric Snow <ericsnowcurrently@gmail.com>
Sat, 19 Oct 2019 02:00:04 +0000 (19:00 -0700)
committerGitHub <noreply@github.com>
Sat, 19 Oct 2019 02:00:04 +0000 (19:00 -0700)
This is partly a cleanup of the code. It also is preparation for getting the variables from the source (cross-platform) rather than from the symbols.

The change only touches the tool (and its tests).

56 files changed:
Lib/test/test_check_c_globals.py
Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_known.py [deleted file]
Lib/test/test_tools/test_c_analyzer/test_c_globals/test_find.py [deleted file]
Lib/test/test_tools/test_c_analyzer/test_common/__init__.py [moved from Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/__init__.py with 100% similarity]
Lib/test/test_tools/test_c_analyzer/test_common/test_files.py [moved from Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_files.py with 99% similarity]
Lib/test/test_tools/test_c_analyzer/test_common/test_info.py [moved from Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_info.py with 98% similarity]
Lib/test/test_tools/test_c_analyzer/test_common/test_show.py [moved from Lib/test/test_tools/test_c_analyzer/test_c_globals/test_show.py with 94% similarity]
Lib/test/test_tools/test_c_analyzer/test_cpython/__init__.py [moved from Lib/test/test_tools/test_c_analyzer/test_c_globals/__init__.py with 100% similarity]
Lib/test/test_tools/test_c_analyzer/test_cpython/test___main__.py [moved from Lib/test/test_tools/test_c_analyzer/test_c_globals/test___main__.py with 79% similarity]
Lib/test/test_tools/test_c_analyzer/test_cpython/test_functional.py [moved from Lib/test/test_tools/test_c_analyzer/test_c_globals/test_functional.py with 100% similarity]
Lib/test/test_tools/test_c_analyzer/test_cpython/test_supported.py [moved from Lib/test/test_tools/test_c_analyzer/test_c_globals/test_supported.py with 85% similarity]
Lib/test/test_tools/test_c_analyzer/test_parser/__init__.py [moved from Lib/test/test_tools/test_c_analyzer/test_c_parser/__init__.py with 100% similarity]
Lib/test/test_tools/test_c_analyzer/test_parser/test_declarations.py [moved from Lib/test/test_tools/test_c_analyzer/test_c_parser/test_declarations.py with 99% similarity]
Lib/test/test_tools/test_c_analyzer/test_parser/test_preprocessor.py [moved from Lib/test/test_tools/test_c_analyzer/test_c_parser/test_preprocessor.py with 99% similarity]
Lib/test/test_tools/test_c_analyzer/test_symbols/__init__.py [moved from Lib/test/test_tools/test_c_analyzer/test_c_symbols/__init__.py with 100% similarity]
Lib/test/test_tools/test_c_analyzer/test_symbols/test_info.py [moved from Lib/test/test_tools/test_c_analyzer/test_c_symbols/test_info.py with 98% similarity]
Lib/test/test_tools/test_c_analyzer/test_variables/__init__.py [new file with mode: 0644]
Lib/test/test_tools/test_c_analyzer/test_variables/test_find.py [new file with mode: 0644]
Lib/test/test_tools/test_c_analyzer/test_variables/test_info.py [moved from Lib/test/test_tools/test_c_analyzer/test_c_parser/test_info.py with 98% similarity]
Lib/test/test_tools/test_c_analyzer/test_variables/test_known.py [new file with mode: 0644]
Tools/c-analyzer/c-globals.py
Tools/c-analyzer/c_analyzer/__init__.py [moved from Tools/c-analyzer/c_globals/__init__.py with 100% similarity]
Tools/c-analyzer/c_analyzer/common/__init__.py [moved from Tools/c-analyzer/c_parser/__init__.py with 100% similarity]
Tools/c-analyzer/c_analyzer/common/files.py [moved from Tools/c-analyzer/c_analyzer_common/files.py with 82% similarity]
Tools/c-analyzer/c_analyzer/common/info.py [new file with mode: 0644]
Tools/c-analyzer/c_analyzer/common/show.py [new file with mode: 0644]
Tools/c-analyzer/c_analyzer/common/util.py [moved from Tools/c-analyzer/c_analyzer_common/util.py with 100% similarity]
Tools/c-analyzer/c_analyzer/parser/__init__.py [moved from Tools/c-analyzer/c_symbols/__init__.py with 100% similarity]
Tools/c-analyzer/c_analyzer/parser/declarations.py [moved from Tools/c-analyzer/c_parser/declarations.py with 83% similarity]
Tools/c-analyzer/c_analyzer/parser/find.py [new file with mode: 0644]
Tools/c-analyzer/c_analyzer/parser/naive.py [moved from Tools/c-analyzer/c_parser/naive.py with 80% similarity]
Tools/c-analyzer/c_analyzer/parser/preprocessor.py [moved from Tools/c-analyzer/c_parser/preprocessor.py with 99% similarity]
Tools/c-analyzer/c_analyzer/parser/source.py [moved from Tools/c-analyzer/c_parser/source.py with 100% similarity]
Tools/c-analyzer/c_analyzer/symbols/__init__.py [new file with mode: 0644]
Tools/c-analyzer/c_analyzer/symbols/_nm.py [moved from Tools/c-analyzer/c_symbols/binary.py with 50% similarity]
Tools/c-analyzer/c_analyzer/symbols/find.py [new file with mode: 0644]
Tools/c-analyzer/c_analyzer/symbols/info.py [moved from Tools/c-analyzer/c_symbols/info.py with 93% similarity]
Tools/c-analyzer/c_analyzer/variables/__init__.py [new file with mode: 0644]
Tools/c-analyzer/c_analyzer/variables/find.py [new file with mode: 0644]
Tools/c-analyzer/c_analyzer/variables/info.py [moved from Tools/c-analyzer/c_parser/info.py with 61% similarity]
Tools/c-analyzer/c_analyzer/variables/known.py [new file with mode: 0644]
Tools/c-analyzer/c_analyzer_common/__init__.py [deleted file]
Tools/c-analyzer/c_analyzer_common/info.py [deleted file]
Tools/c-analyzer/c_analyzer_common/known.py [deleted file]
Tools/c-analyzer/c_globals/find.py [deleted file]
Tools/c-analyzer/c_globals/show.py [deleted file]
Tools/c-analyzer/c_symbols/resolve.py [deleted file]
Tools/c-analyzer/c_symbols/source.py [deleted file]
Tools/c-analyzer/cpython/README [moved from Tools/c-analyzer/c_globals/README with 100% similarity]
Tools/c-analyzer/cpython/__init__.py [new file with mode: 0644]
Tools/c-analyzer/cpython/__main__.py [moved from Tools/c-analyzer/c_globals/__main__.py with 68% similarity]
Tools/c-analyzer/cpython/_generate.py [moved from Tools/c-analyzer/c_analyzer_common/_generate.py with 97% similarity]
Tools/c-analyzer/cpython/files.py [new file with mode: 0644]
Tools/c-analyzer/cpython/find.py [new file with mode: 0644]
Tools/c-analyzer/cpython/known.py [new file with mode: 0644]
Tools/c-analyzer/cpython/supported.py [moved from Tools/c-analyzer/c_globals/supported.py with 97% similarity]

index a3925f0ca887eaff017122d6ba05a97d8973be25..030debc452e40924c3640d218f4e6121f116e3f0 100644 (file)
@@ -3,7 +3,7 @@ import test.test_tools
 
 test.test_tools.skip_if_missing('c-analyzer')
 with test.test_tools.imports_under_tool('c-analyzer'):
-    from c_globals.__main__ import main
+    from cpython.__main__ import main
 
 
 class ActualChecks(unittest.TestCase):
diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_known.py b/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_known.py
deleted file mode 100644 (file)
index 215023d..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-import re
-import textwrap
-import unittest
-
-from .. import tool_imports_for_tests
-with tool_imports_for_tests():
-    from c_parser.info import Variable
-    from c_analyzer_common.info import ID
-    from c_analyzer_common.known import from_file
-
-
-class FromFileTests(unittest.TestCase):
-
-    maxDiff = None
-
-    _return_read_tsv = ()
-
-    @property
-    def calls(self):
-        try:
-            return self._calls
-        except AttributeError:
-            self._calls = []
-            return self._calls
-
-    def _read_tsv(self, *args):
-        self.calls.append(('_read_tsv', args))
-        return self._return_read_tsv
-
-    def test_typical(self):
-        lines = textwrap.dedent('''
-            filename    funcname        name    kind    declaration
-            file1.c     -       var1    variable        static int
-            file1.c     func1   local1  variable        static int
-            file1.c     -       var2    variable        int
-            file1.c     func2   local2  variable        char *
-            file2.c     -       var1    variable        char *
-            ''').strip().splitlines()
-        lines = [re.sub(r'\s+', '\t', line, 4) for line in lines]
-        self._return_read_tsv = [tuple(v.strip() for v in line.split('\t'))
-                                 for line in lines[1:]]
-
-        known = from_file('spam.c', _read_tsv=self._read_tsv)
-
-        self.assertEqual(known, {
-            'variables': {v.id: v for v in [
-                Variable.from_parts('file1.c', '', 'var1', 'static int'),
-                Variable.from_parts('file1.c', 'func1', 'local1', 'static int'),
-                Variable.from_parts('file1.c', '', 'var2', 'int'),
-                Variable.from_parts('file1.c', 'func2', 'local2', 'char *'),
-                Variable.from_parts('file2.c', '', 'var1', 'char *'),
-                ]},
-            })
-        self.assertEqual(self.calls, [
-            ('_read_tsv', ('spam.c', 'filename\tfuncname\tname\tkind\tdeclaration')),
-            ])
-
-    def test_empty(self):
-        self._return_read_tsv = []
-
-        known = from_file('spam.c', _read_tsv=self._read_tsv)
-
-        self.assertEqual(known, {
-            'variables': {},
-            })
-        self.assertEqual(self.calls, [
-            ('_read_tsv', ('spam.c', 'filename\tfuncname\tname\tkind\tdeclaration')),
-            ])
diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_find.py b/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_find.py
deleted file mode 100644 (file)
index 8288992..0000000
+++ /dev/null
@@ -1,335 +0,0 @@
-import unittest
-
-from .. import tool_imports_for_tests
-with tool_imports_for_tests():
-    from c_parser import info
-    from c_globals.find import globals_from_binary, globals
-
-
-class _Base(unittest.TestCase):
-
-    maxDiff = None
-
-    @property
-    def calls(self):
-        try:
-            return self._calls
-        except AttributeError:
-            self._calls = []
-            return self._calls
-
-
-class StaticsFromBinaryTests(_Base):
-
-    _return_iter_symbols = ()
-    _return_resolve_symbols = ()
-    _return_get_symbol_resolver = None
-
-    def setUp(self):
-        super().setUp()
-
-        self.kwargs = dict(
-                _iter_symbols=self._iter_symbols,
-                _resolve=self._resolve_symbols,
-                _get_symbol_resolver=self._get_symbol_resolver,
-                )
-
-    def _iter_symbols(self, binfile, find_local_symbol):
-        self.calls.append(('_iter_symbols', (binfile, find_local_symbol)))
-        return self._return_iter_symbols
-
-    def _resolve_symbols(self, symbols, resolve):
-        self.calls.append(('_resolve_symbols', (symbols, resolve,)))
-        return self._return_resolve_symbols
-
-    def _get_symbol_resolver(self, knownvars, dirnames=None):
-        self.calls.append(('_get_symbol_resolver', (knownvars, dirnames)))
-        return self._return_get_symbol_resolver
-
-    def test_typical(self):
-        symbols = self._return_iter_symbols = ()
-        resolver = self._return_get_symbol_resolver = object()
-        variables = self._return_resolve_symbols = [
-            info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'),
-            info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'),
-            info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'),
-            info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', 'const char *'),
-            info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'),
-            info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'),
-            ]
-        knownvars = object()
-
-        found = list(globals_from_binary('python',
-                                         knownvars=knownvars,
-                                         **self.kwargs))
-
-        self.assertEqual(found, [
-            info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'),
-            info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'),
-            info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'),
-            info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'),
-            info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'),
-            ])
-        self.assertEqual(self.calls, [
-            ('_iter_symbols', ('python', None)),
-            ('_get_symbol_resolver', (knownvars, None)),
-            ('_resolve_symbols', (symbols, resolver)),
-            ])
-
-#        self._return_iter_symbols = [
-#                s_info.Symbol(('dir1/spam.c', None, 'var1'), 'variable', False),
-#                s_info.Symbol(('dir1/spam.c', None, 'var2'), 'variable', False),
-#                s_info.Symbol(('dir1/spam.c', None, 'func1'), 'function', False),
-#                s_info.Symbol(('dir1/spam.c', None, 'func2'), 'function', True),
-#                s_info.Symbol(('dir1/spam.c', None, 'var3'), 'variable', False),
-#                s_info.Symbol(('dir1/spam.c', 'func2', 'var4'), 'variable', False),
-#                s_info.Symbol(('dir1/ham.c', None, 'var1'), 'variable', True),
-#                s_info.Symbol(('dir1/eggs.c', None, 'var1'), 'variable', False),
-#                s_info.Symbol(('dir1/eggs.c', None, 'xyz'), 'other', False),
-#                s_info.Symbol(('dir1/eggs.c', '???', 'var2'), 'variable', False),
-#                s_info.Symbol(('???', None, 'var_x'), 'variable', False),
-#                s_info.Symbol(('???', '???', 'var_y'), 'variable', False),
-#                s_info.Symbol((None, None, '???'), 'other', False),
-#                ]
-#        known = object()
-#
-#        globals_from_binary('python', knownvars=known, **this.kwargs)
-#        found = list(globals_from_symbols(['dir1'], self.iter_symbols))
-#
-#        self.assertEqual(found, [
-#            info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
-#            info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
-#            info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
-#            info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
-#            info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
-#            ])
-#        self.assertEqual(self.calls, [
-#            ('iter_symbols', (['dir1'],)),
-#            ])
-#
-#    def test_no_symbols(self):
-#        self._return_iter_symbols = []
-#
-#        found = list(globals_from_symbols(['dir1'], self.iter_symbols))
-#
-#        self.assertEqual(found, [])
-#        self.assertEqual(self.calls, [
-#            ('iter_symbols', (['dir1'],)),
-#            ])
-
-    # XXX need functional test
-
-
-#class StaticFromDeclarationsTests(_Base):
-#
-#    _return_iter_declarations = ()
-#
-#    def iter_declarations(self, dirnames):
-#        self.calls.append(('iter_declarations', (dirnames,)))
-#        return iter(self._return_iter_declarations)
-#
-#    def test_typical(self):
-#        self._return_iter_declarations = [
-#            None,
-#            info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
-#            object(),
-#            info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
-#            info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
-#            object(),
-#            info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
-#            object(),
-#            info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
-#            object(),
-#            ]
-#
-#        found = list(globals_from_declarations(['dir1'], self.iter_declarations))
-#
-#        self.assertEqual(found, [
-#            info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
-#            info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
-#            info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
-#            info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
-#            info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
-#            ])
-#        self.assertEqual(self.calls, [
-#            ('iter_declarations', (['dir1'],)),
-#            ])
-#
-#    def test_no_declarations(self):
-#        self._return_iter_declarations = []
-#
-#        found = list(globals_from_declarations(['dir1'], self.iter_declarations))
-#
-#        self.assertEqual(found, [])
-#        self.assertEqual(self.calls, [
-#            ('iter_declarations', (['dir1'],)),
-#            ])
-
-
-#class IterVariablesTests(_Base):
-#
-#    _return_from_symbols = ()
-#    _return_from_declarations = ()
-#
-#    def _from_symbols(self, dirnames, iter_symbols):
-#        self.calls.append(('_from_symbols', (dirnames, iter_symbols)))
-#        return iter(self._return_from_symbols)
-#
-#    def _from_declarations(self, dirnames, iter_declarations):
-#        self.calls.append(('_from_declarations', (dirnames, iter_declarations)))
-#        return iter(self._return_from_declarations)
-#
-#    def test_typical(self):
-#        expected = [
-#            info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
-#            info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
-#            info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
-#            info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
-#            info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
-#            ]
-#        self._return_from_symbols = expected
-#
-#        found = list(iter_variables(['dir1'],
-#                                  _from_symbols=self._from_symbols,
-#                                  _from_declarations=self._from_declarations))
-#
-#        self.assertEqual(found, expected)
-#        self.assertEqual(self.calls, [
-#            ('_from_symbols', (['dir1'], b_symbols.iter_symbols)),
-#            ])
-#
-#    def test_no_symbols(self):
-#        self._return_from_symbols = []
-#
-#        found = list(iter_variables(['dir1'],
-#                                  _from_symbols=self._from_symbols,
-#                                  _from_declarations=self._from_declarations))
-#
-#        self.assertEqual(found, [])
-#        self.assertEqual(self.calls, [
-#            ('_from_symbols', (['dir1'], b_symbols.iter_symbols)),
-#            ])
-#
-#    def test_from_binary(self):
-#        expected = [
-#            info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
-#            info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
-#            info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
-#            info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
-#            info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
-#            ]
-#        self._return_from_symbols = expected
-#
-#        found = list(iter_variables(['dir1'], 'platform',
-#                                  _from_symbols=self._from_symbols,
-#                                  _from_declarations=self._from_declarations))
-#
-#        self.assertEqual(found, expected)
-#        self.assertEqual(self.calls, [
-#            ('_from_symbols', (['dir1'], b_symbols.iter_symbols)),
-#            ])
-#
-#    def test_from_symbols(self):
-#        expected = [
-#            info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
-#            info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
-#            info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
-#            info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
-#            info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
-#            ]
-#        self._return_from_symbols = expected
-#
-#        found = list(iter_variables(['dir1'], 'symbols',
-#                                  _from_symbols=self._from_symbols,
-#                                  _from_declarations=self._from_declarations))
-#
-#        self.assertEqual(found, expected)
-#        self.assertEqual(self.calls, [
-#            ('_from_symbols', (['dir1'], s_symbols.iter_symbols)),
-#            ])
-#
-#    def test_from_declarations(self):
-#        expected = [
-#            info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
-#            info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
-#            info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
-#            info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
-#            info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
-#            ]
-#        self._return_from_declarations = expected
-#
-#        found = list(iter_variables(['dir1'], 'declarations',
-#                                  _from_symbols=self._from_symbols,
-#                                  _from_declarations=self._from_declarations))
-#
-#        self.assertEqual(found, expected)
-#        self.assertEqual(self.calls, [
-#            ('_from_declarations', (['dir1'], declarations.iter_all)),
-#            ])
-#
-#    def test_from_preprocessed(self):
-#        expected = [
-#            info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
-#            info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
-#            info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
-#            info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
-#            info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
-#            ]
-#        self._return_from_declarations = expected
-#
-#        found = list(iter_variables(['dir1'], 'preprocessed',
-#                                  _from_symbols=self._from_symbols,
-#                                  _from_declarations=self._from_declarations))
-#
-#        self.assertEqual(found, expected)
-#        self.assertEqual(self.calls, [
-#            ('_from_declarations', (['dir1'], declarations.iter_preprocessed)),
-#            ])
-
-
-class StaticsTest(_Base):
-
-    _return_iter_variables = None
-
-    def _iter_variables(self, kind, *, known, dirnames):
-        self.calls.append(
-                ('_iter_variables', (kind, known, dirnames)))
-        return iter(self._return_iter_variables or ())
-
-    def test_typical(self):
-        self._return_iter_variables = [
-            info.Variable.from_parts('src1/spam.c', None, 'var1', 'static const char *'),
-            info.Variable.from_parts('src1/spam.c', None, 'var1b', 'const char *'),
-            info.Variable.from_parts('src1/spam.c', 'ham', 'initialized', 'static int'),
-            info.Variable.from_parts('src1/spam.c', 'ham', 'result', 'int'),  # skipped
-            info.Variable.from_parts('src1/spam.c', None, 'var2', 'static PyObject *'),
-            info.Variable.from_parts('src1/eggs.c', 'tofu', 'ready', 'static int'),
-            info.Variable.from_parts('src1/spam.c', None, 'freelist', 'static (PyTupleObject *)[10]'),
-            info.Variable.from_parts('src1/sub/ham.c', None, 'var1', 'static const char const *'),
-            info.Variable.from_parts('src2/jam.c', None, 'var1', 'static int'),
-            info.Variable.from_parts('src2/jam.c', None, 'var2', 'static MyObject *'),
-            info.Variable.from_parts('Include/spam.h', None, 'data', 'static const int'),
-            ]
-        dirnames = object()
-        known = object()
-
-        found = list(globals(dirnames, known,
-                             kind='platform',
-                             _iter_variables=self._iter_variables,
-                             ))
-
-        self.assertEqual(found, [
-            info.Variable.from_parts('src1/spam.c', None, 'var1', 'static const char *'),
-            info.Variable.from_parts('src1/spam.c', None, 'var1b', 'const char *'),
-            info.Variable.from_parts('src1/spam.c', 'ham', 'initialized', 'static int'),
-            info.Variable.from_parts('src1/spam.c', None, 'var2', 'static PyObject *'),
-            info.Variable.from_parts('src1/eggs.c', 'tofu', 'ready', 'static int'),
-            info.Variable.from_parts('src1/spam.c', None, 'freelist', 'static (PyTupleObject *)[10]'),
-            info.Variable.from_parts('src1/sub/ham.c', None, 'var1', 'static const char const *'),
-            info.Variable.from_parts('src2/jam.c', None, 'var1', 'static int'),
-            info.Variable.from_parts('src2/jam.c', None, 'var2', 'static MyObject *'),
-            info.Variable.from_parts('Include/spam.h', None, 'data', 'static const int'),
-            ])
-        self.assertEqual(self.calls, [
-            ('_iter_variables', ('platform', known, dirnames)),
-            ])
similarity index 99%
rename from Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_files.py
rename to Lib/test/test_tools/test_c_analyzer/test_common/test_files.py
index 6d14aea78a486e062f209a6d16fa5f377c086e77..0c97d2a0bbf9adee72bb80d8b5522a0404976fa2 100644 (file)
@@ -3,7 +3,7 @@ import unittest
 
 from .. import tool_imports_for_tests
 with tool_imports_for_tests():
-    from c_analyzer_common.files import (
+    from c_analyzer.common.files import (
             iter_files, _walk_tree, glob_tree,
             )
 
similarity index 98%
rename from Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_info.py
rename to Lib/test/test_tools/test_c_analyzer/test_common/test_info.py
index 2d386713b9989c18d6be5802130a07d385dab02e..69dbb582c6b684d27e66aa809e478dcb1ef5018c 100644 (file)
@@ -4,7 +4,10 @@ import unittest
 from ..util import PseudoStr, StrProxy, Object
 from .. import tool_imports_for_tests
 with tool_imports_for_tests():
-    from c_analyzer_common.info import ID
+    from c_analyzer.common.info import (
+            UNKNOWN,
+            ID,
+            )
 
 
 class IDTests(unittest.TestCase):
similarity index 94%
rename from Lib/test/test_tools/test_c_analyzer/test_c_globals/test_show.py
rename to Lib/test/test_tools/test_c_analyzer/test_common/test_show.py
index ce1dad85db1b870c7a19f638bbae76737166f407..91ca2f3b344dd39d81f72b536e6bab1ded4a4527 100644 (file)
@@ -2,8 +2,10 @@ import unittest
 
 from .. import tool_imports_for_tests
 with tool_imports_for_tests():
-    from c_parser import info
-    from c_globals.show import basic
+    from c_analyzer.variables import info
+    from c_analyzer.common.show import (
+            basic,
+            )
 
 
 TYPICAL = [
similarity index 79%
rename from Lib/test/test_tools/test_c_analyzer/test_c_globals/test___main__.py
rename to Lib/test/test_tools/test_c_analyzer/test_cpython/test___main__.py
index 5f52c588d7c8bd91f57fa5fc677f8a73070785e9..6d69ed7525b595d9fee59f1198ad349f091d1e69 100644 (file)
@@ -3,12 +3,13 @@ import unittest
 
 from .. import tool_imports_for_tests
 with tool_imports_for_tests():
-    from c_analyzer_common import SOURCE_DIRS
-    from c_analyzer_common.known import DATA_FILE as KNOWN_FILE
-    from c_parser import info
-    import c_globals as cg
-    from c_globals.supported import IGNORED_FILE
-    from c_globals.__main__ import cmd_check, cmd_show, parse_args, main
+    from c_analyzer.variables import info
+    from cpython import SOURCE_DIRS
+    from cpython.supported import IGNORED_FILE
+    from cpython.known import DATA_FILE as KNOWN_FILE
+    from cpython.__main__ import (
+            cmd_check, cmd_show, parse_args, main,
+            )
 
 
 TYPICAL = [
@@ -46,6 +47,8 @@ class CMDBase(unittest.TestCase):
 
     maxDiff = None
 
+#    _return_known_from_file = None
+#    _return_ignored_from_file = None
     _return_find = ()
 
     @property
@@ -56,8 +59,16 @@ class CMDBase(unittest.TestCase):
             self._calls = []
             return self._calls
 
-    def _find(self, *args):
-        self.calls.append(('_find', args))
+#    def _known_from_file(self, *args):
+#        self.calls.append(('_known_from_file', args))
+#        return self._return_known_from_file or {}
+#
+#    def _ignored_from_file(self, *args):
+#        self.calls.append(('_ignored_from_file', args))
+#        return self._return_ignored_from_file or {}
+
+    def _find(self, known, ignored, skip_objects=False):
+        self.calls.append(('_find', (known, ignored, skip_objects)))
         return self._return_find
 
     def _show(self, *args):
@@ -78,41 +89,35 @@ class CheckTests(CMDBase):
                   _print=self._print,
                   )
 
-        self.assertEqual(self.calls[0], (
-            '_find', (
-                SOURCE_DIRS,
-                KNOWN_FILE,
-                IGNORED_FILE,
-                ),
-            ))
+        self.assertEqual(
+                self.calls[0],
+                ('_find', (KNOWN_FILE, IGNORED_FILE, False)),
+                )
 
     def test_all_supported(self):
         self._return_find = [(v, s) for v, s in TYPICAL if s]
         dirs = ['src1', 'src2', 'Include']
 
         cmd_check('check',
-                 dirs,
-                 ignored='ignored.tsv',
-                 known='known.tsv',
-                 _find=self._find,
-                 _show=self._show,
-                 _print=self._print,
-                 )
+                  known='known.tsv',
+                  ignored='ignored.tsv',
+                  _find=self._find,
+                  _show=self._show,
+                  _print=self._print,
+                  )
 
         self.assertEqual(self.calls, [
-            ('_find', (dirs, 'known.tsv', 'ignored.tsv')),
+            ('_find', ('known.tsv', 'ignored.tsv', False)),
             #('_print', ('okay',)),
             ])
 
     def test_some_unsupported(self):
         self._return_find = TYPICAL
-        dirs = ['src1', 'src2', 'Include']
 
         with self.assertRaises(SystemExit) as cm:
             cmd_check('check',
-                      dirs,
-                      ignored='ignored.tsv',
                       known='known.tsv',
+                      ignored='ignored.tsv',
                       _find=self._find,
                       _show=self._show,
                       _print=self._print,
@@ -120,7 +125,7 @@ class CheckTests(CMDBase):
 
         unsupported = [v for v, s in TYPICAL if not s]
         self.assertEqual(self.calls, [
-            ('_find', (dirs, 'known.tsv', 'ignored.tsv')),
+            ('_find', ('known.tsv', 'ignored.tsv', False)),
             ('_print', ('ERROR: found unsupported global variables',)),
             ('_print', ()),
             ('_show', (sorted(unsupported),)),
@@ -140,20 +145,15 @@ class ShowTests(CMDBase):
                  _print=self._print,
                  )
 
-        self.assertEqual(self.calls[0], (
-            '_find', (
-                SOURCE_DIRS,
-                KNOWN_FILE,
-                IGNORED_FILE,
-                ),
-            ))
+        self.assertEqual(
+                self.calls[0],
+                ('_find', (KNOWN_FILE, IGNORED_FILE, False)),
+                )
 
     def test_typical(self):
         self._return_find = TYPICAL
-        dirs = ['src1', 'src2', 'Include']
 
         cmd_show('show',
-                 dirs,
                  known='known.tsv',
                  ignored='ignored.tsv',
                  _find=self._find,
@@ -164,7 +164,7 @@ class ShowTests(CMDBase):
         supported = [v for v, s in TYPICAL if s]
         unsupported = [v for v, s in TYPICAL if not s]
         self.assertEqual(self.calls, [
-            ('_find', (dirs, 'known.tsv', 'ignored.tsv')),
+            ('_find', ('known.tsv', 'ignored.tsv', False)),
             ('_print', ('supported:',)),
             ('_print', ('----------',)),
             ('_show', (sorted(supported),)),
@@ -201,7 +201,7 @@ class ParseArgsTests(unittest.TestCase):
         self.assertEqual(cmdkwargs, {
             'ignored': IGNORED_FILE,
             'known': KNOWN_FILE,
-            'dirs': SOURCE_DIRS,
+            #'dirs': SOURCE_DIRS,
             })
 
     def test_check_full_args(self):
@@ -209,16 +209,16 @@ class ParseArgsTests(unittest.TestCase):
             'check',
             '--ignored', 'spam.tsv',
             '--known', 'eggs.tsv',
-            'dir1',
-            'dir2',
-            'dir3',
+            #'dir1',
+            #'dir2',
+            #'dir3',
             ])
 
         self.assertEqual(cmd, 'check')
         self.assertEqual(cmdkwargs, {
             'ignored': 'spam.tsv',
             'known': 'eggs.tsv',
-            'dirs': ['dir1', 'dir2', 'dir3']
+            #'dirs': ['dir1', 'dir2', 'dir3']
             })
 
     def test_show_no_args(self):
@@ -230,7 +230,7 @@ class ParseArgsTests(unittest.TestCase):
         self.assertEqual(cmdkwargs, {
             'ignored': IGNORED_FILE,
             'known': KNOWN_FILE,
-            'dirs': SOURCE_DIRS,
+            #'dirs': SOURCE_DIRS,
             'skip_objects': False,
             })
 
@@ -239,16 +239,16 @@ class ParseArgsTests(unittest.TestCase):
             'show',
             '--ignored', 'spam.tsv',
             '--known', 'eggs.tsv',
-            'dir1',
-            'dir2',
-            'dir3',
+            #'dir1',
+            #'dir2',
+            #'dir3',
             ])
 
         self.assertEqual(cmd, 'show')
         self.assertEqual(cmdkwargs, {
             'ignored': 'spam.tsv',
             'known': 'eggs.tsv',
-            'dirs': ['dir1', 'dir2', 'dir3'],
+            #'dirs': ['dir1', 'dir2', 'dir3'],
             'skip_objects': False,
             })
 
similarity index 85%
rename from Lib/test/test_tools/test_c_analyzer/test_c_globals/test_supported.py
rename to Lib/test/test_tools/test_c_analyzer/test_cpython/test_supported.py
index 1e7d40e2afcbda3e4c574618dfee88f03a286581..a244b97e1fc7c7ba272be312c3e94da4f2d01c1f 100644 (file)
@@ -4,9 +4,11 @@ import unittest
 
 from .. import tool_imports_for_tests
 with tool_imports_for_tests():
-    from c_analyzer_common.info import ID
-    from c_parser import info
-    from c_globals.supported import is_supported, ignored_from_file
+    from c_analyzer.common.info import ID
+    from c_analyzer.variables.info import Variable
+    from cpython.supported import (
+            is_supported, ignored_from_file,
+            )
 
 
 class IsSupportedTests(unittest.TestCase):
@@ -14,8 +16,8 @@ class IsSupportedTests(unittest.TestCase):
     @unittest.expectedFailure
     def test_supported(self):
         statics = [
-                info.StaticVar('src1/spam.c', None, 'var1', 'const char *'),
-                info.StaticVar('src1/spam.c', None, 'var1', 'int'),
+                Variable('src1/spam.c', None, 'var1', 'const char *'),
+                Variable('src1/spam.c', None, 'var1', 'int'),
                 ]
         for static in statics:
             with self.subTest(static):
@@ -26,8 +28,8 @@ class IsSupportedTests(unittest.TestCase):
     @unittest.expectedFailure
     def test_not_supported(self):
         statics = [
-                info.StaticVar('src1/spam.c', None, 'var1', 'PyObject *'),
-                info.StaticVar('src1/spam.c', None, 'var1', 'PyObject[10]'),
+                Variable('src1/spam.c', None, 'var1', 'PyObject *'),
+                Variable('src1/spam.c', None, 'var1', 'PyObject[10]'),
                 ]
         for static in statics:
             with self.subTest(static):
similarity index 99%
rename from Lib/test/test_tools/test_c_analyzer/test_c_parser/test_declarations.py
rename to Lib/test/test_tools/test_c_analyzer/test_parser/test_declarations.py
index b68744ef0aba49d8edf8c43bf7859510186325c2..674fcb1af1c7adf924f646bd8f4dba62abe1713f 100644 (file)
@@ -3,9 +3,9 @@ import unittest
 
 from .. import tool_imports_for_tests
 with tool_imports_for_tests():
-    from c_parser.declarations import (
+    from c_analyzer.parser.declarations import (
         iter_global_declarations, iter_local_statements,
-        parse_func, parse_var, parse_compound,
+        parse_func, _parse_var, parse_compound,
         iter_variables,
         )
 
@@ -515,7 +515,7 @@ class ParseVarTests(TestCaseBase):
             ])
         for stmt, expected in tests:
             with self.subTest(stmt):
-                name, vartype = parse_var(stmt)
+                name, vartype = _parse_var(stmt)
 
                 self.assertEqual((name, vartype), expected)
 
similarity index 99%
rename from Lib/test/test_tools/test_c_analyzer/test_c_parser/test_preprocessor.py
rename to Lib/test/test_tools/test_c_analyzer/test_parser/test_preprocessor.py
index 89e15570d6530929c0131fa5cc25422e66d2fdf4..56a1c9c612f72614673ce33f554431d9e0194483 100644 (file)
@@ -6,7 +6,7 @@ import sys
 from ..util import wrapped_arg_combos, StrProxy
 from .. import tool_imports_for_tests
 with tool_imports_for_tests():
-    from c_parser.preprocessor import (
+    from c_analyzer.parser.preprocessor import (
         iter_lines,
         # directives
         parse_directive, PreprocessorDirective,
similarity index 98%
rename from Lib/test/test_tools/test_c_analyzer/test_c_symbols/test_info.py
rename to Lib/test/test_tools/test_c_analyzer/test_symbols/test_info.py
index e029dcf66124dc61a12b815f996ef25b9b439fa1..1282a89718c820045658728ccb96ba5e9b9a6958 100644 (file)
@@ -4,8 +4,8 @@ import unittest
 from ..util import PseudoStr, StrProxy, Object
 from .. import tool_imports_for_tests
 with tool_imports_for_tests():
-    from c_analyzer_common.info import ID
-    from c_symbols.info import Symbol
+    from c_analyzer.common.info import ID
+    from c_analyzer.symbols.info import Symbol
 
 
 class SymbolTests(unittest.TestCase):
diff --git a/Lib/test/test_tools/test_c_analyzer/test_variables/__init__.py b/Lib/test/test_tools/test_c_analyzer/test_variables/__init__.py
new file mode 100644 (file)
index 0000000..bc502ef
--- /dev/null
@@ -0,0 +1,6 @@
+import os.path
+from test.support import load_package_tests
+
+
+def load_tests(*args):
+    return load_package_tests(os.path.dirname(__file__), *args)
diff --git a/Lib/test/test_tools/test_c_analyzer/test_variables/test_find.py b/Lib/test/test_tools/test_c_analyzer/test_variables/test_find.py
new file mode 100644 (file)
index 0000000..7a13cf3
--- /dev/null
@@ -0,0 +1,124 @@
+import unittest
+
+from .. import tool_imports_for_tests
+with tool_imports_for_tests():
+    from c_analyzer.variables import info
+    from c_analyzer.variables.find import (
+            vars_from_binary,
+            )
+
+
+class _Base(unittest.TestCase):
+
+    maxDiff = None
+
+    @property
+    def calls(self):
+        try:
+            return self._calls
+        except AttributeError:
+            self._calls = []
+            return self._calls
+
+
+class VarsFromBinaryTests(_Base):
+
+    _return_iter_vars = ()
+    _return_get_symbol_resolver = None
+
+    def setUp(self):
+        super().setUp()
+
+        self.kwargs = dict(
+                _iter_vars=self._iter_vars,
+                _get_symbol_resolver=self._get_symbol_resolver,
+                )
+
+    def _iter_vars(self, binfile, resolve, handle_id):
+        self.calls.append(('_iter_vars', (binfile, resolve, handle_id)))
+        return [(v, v.id) for v in self._return_iter_vars]
+
+    def _get_symbol_resolver(self, known=None, dirnames=(), *,
+                             handle_var,
+                             filenames=None,
+                             check_filename=None,
+                             perfilecache=None,
+                             ):
+        self.calls.append(('_get_symbol_resolver',
+                           (known, dirnames, handle_var, filenames,
+                            check_filename, perfilecache)))
+        return self._return_get_symbol_resolver
+
+    def test_typical(self):
+        resolver = self._return_get_symbol_resolver = object()
+        variables = self._return_iter_vars = [
+            info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'),
+            info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'),
+            info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'),
+            info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', 'const char *'),
+            info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'),
+            info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'),
+            ]
+        known = object()
+        filenames = object()
+
+        found = list(vars_from_binary('python',
+                                      known=known,
+                                      filenames=filenames,
+                                      **self.kwargs))
+
+        self.assertEqual(found, [
+            info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'),
+            info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'),
+            info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'),
+            info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', 'const char *'),
+            info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'),
+            info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'),
+            ])
+        self.assertEqual(self.calls, [
+            ('_get_symbol_resolver', (filenames, known, info.Variable.from_id, None, None, {})),
+            ('_iter_vars', ('python', resolver, None)),
+            ])
+
+#        self._return_iter_symbols = [
+#                s_info.Symbol(('dir1/spam.c', None, 'var1'), 'variable', False),
+#                s_info.Symbol(('dir1/spam.c', None, 'var2'), 'variable', False),
+#                s_info.Symbol(('dir1/spam.c', None, 'func1'), 'function', False),
+#                s_info.Symbol(('dir1/spam.c', None, 'func2'), 'function', True),
+#                s_info.Symbol(('dir1/spam.c', None, 'var3'), 'variable', False),
+#                s_info.Symbol(('dir1/spam.c', 'func2', 'var4'), 'variable', False),
+#                s_info.Symbol(('dir1/ham.c', None, 'var1'), 'variable', True),
+#                s_info.Symbol(('dir1/eggs.c', None, 'var1'), 'variable', False),
+#                s_info.Symbol(('dir1/eggs.c', None, 'xyz'), 'other', False),
+#                s_info.Symbol(('dir1/eggs.c', '???', 'var2'), 'variable', False),
+#                s_info.Symbol(('???', None, 'var_x'), 'variable', False),
+#                s_info.Symbol(('???', '???', 'var_y'), 'variable', False),
+#                s_info.Symbol((None, None, '???'), 'other', False),
+#                ]
+#        known = object()
+#
+#        vars_from_binary('python', knownvars=known, **this.kwargs)
+#        found = list(globals_from_symbols(['dir1'], self.iter_symbols))
+#
+#        self.assertEqual(found, [
+#            info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
+#            info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
+#            info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
+#            info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
+#            info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
+#            ])
+#        self.assertEqual(self.calls, [
+#            ('iter_symbols', (['dir1'],)),
+#            ])
+#
+#    def test_no_symbols(self):
+#        self._return_iter_symbols = []
+#
+#        found = list(globals_from_symbols(['dir1'], self.iter_symbols))
+#
+#        self.assertEqual(found, [])
+#        self.assertEqual(self.calls, [
+#            ('iter_symbols', (['dir1'],)),
+#            ])
+
+    # XXX need functional test
similarity index 98%
rename from Lib/test/test_tools/test_c_analyzer/test_c_parser/test_info.py
rename to Lib/test/test_tools/test_c_analyzer/test_variables/test_info.py
index d1a966c58904db705ded87b9c081ee87eeede7e9..d424d8eebb811149354a464bc4e09afbb54b48f4 100644 (file)
@@ -4,10 +4,10 @@ import unittest
 from ..util import PseudoStr, StrProxy, Object
 from .. import tool_imports_for_tests
 with tool_imports_for_tests():
-    from c_analyzer_common.info import ID, UNKNOWN
-    from c_parser.info import (
-        normalize_vartype, Variable,
-        )
+    from c_analyzer.common.info import UNKNOWN, ID
+    from c_analyzer.variables.info import (
+            normalize_vartype, Variable
+            )
 
 
 class NormalizeVartypeTests(unittest.TestCase):
diff --git a/Lib/test/test_tools/test_c_analyzer/test_variables/test_known.py b/Lib/test/test_tools/test_c_analyzer/test_variables/test_known.py
new file mode 100644 (file)
index 0000000..49ff45c
--- /dev/null
@@ -0,0 +1,139 @@
+import re
+import textwrap
+import unittest
+
+from .. import tool_imports_for_tests
+with tool_imports_for_tests():
+    from c_analyzer.common.info import ID
+    from c_analyzer.variables.info import Variable
+    from c_analyzer.variables.known import (
+            read_file,
+            from_file,
+            )
+
+class _BaseTests(unittest.TestCase):
+
+    maxDiff = None
+
+    @property
+    def calls(self):
+        try:
+            return self._calls
+        except AttributeError:
+            self._calls = []
+            return self._calls
+
+
+class ReadFileTests(_BaseTests):
+
+    _return_read_tsv = ()
+
+    def _read_tsv(self, *args):
+        self.calls.append(('_read_tsv', args))
+        return self._return_read_tsv
+
+    def test_typical(self):
+        lines = textwrap.dedent('''
+            filename    funcname        name    kind    declaration
+            file1.c     -       var1    variable        static int
+            file1.c     func1   local1  variable        static int
+            file1.c     -       var2    variable        int
+            file1.c     func2   local2  variable        char *
+            file2.c     -       var1    variable        char *
+            ''').strip().splitlines()
+        lines = [re.sub(r'\s+', '\t', line, 4) for line in lines]
+        self._return_read_tsv = [tuple(v.strip() for v in line.split('\t'))
+                                 for line in lines[1:]]
+
+        known = list(read_file('known.tsv', _read_tsv=self._read_tsv))
+
+        self.assertEqual(known, [
+            ('variable', ID('file1.c', '', 'var1'), 'static int'),
+            ('variable', ID('file1.c', 'func1', 'local1'), 'static int'),
+            ('variable', ID('file1.c', '', 'var2'), 'int'),
+            ('variable', ID('file1.c', 'func2', 'local2'), 'char *'),
+            ('variable', ID('file2.c', '', 'var1'), 'char *'),
+            ])
+        self.assertEqual(self.calls, [
+            ('_read_tsv',
+             ('known.tsv', 'filename\tfuncname\tname\tkind\tdeclaration')),
+            ])
+
+    def test_empty(self):
+        self._return_read_tsv = []
+
+        known = list(read_file('known.tsv', _read_tsv=self._read_tsv))
+
+        self.assertEqual(known, [])
+        self.assertEqual(self.calls, [
+            ('_read_tsv', ('known.tsv', 'filename\tfuncname\tname\tkind\tdeclaration')),
+            ])
+
+
+class FromFileTests(_BaseTests):
+
+    _return_read_file = ()
+    _return_handle_var = ()
+
+    def _read_file(self, infile):
+        self.calls.append(('_read_file', (infile,)))
+        return iter(self._return_read_file)
+
+    def _handle_var(self, varid, decl):
+        self.calls.append(('_handle_var', (varid, decl)))
+        var = self._return_handle_var.pop(0)
+        return var
+
+    def test_typical(self):
+        expected = [
+            Variable.from_parts('file1.c', '', 'var1', 'static int'),
+            Variable.from_parts('file1.c', 'func1', 'local1', 'static int'),
+            Variable.from_parts('file1.c', '', 'var2', 'int'),
+            Variable.from_parts('file1.c', 'func2', 'local2', 'char *'),
+            Variable.from_parts('file2.c', '', 'var1', 'char *'),
+            ]
+        self._return_read_file = [('variable', v.id, v.vartype)
+                                  for v in expected]
+#            ('variable', ID('file1.c', '', 'var1'), 'static int'),
+#            ('variable', ID('file1.c', 'func1', 'local1'), 'static int'),
+#            ('variable', ID('file1.c', '', 'var2'), 'int'),
+#            ('variable', ID('file1.c', 'func2', 'local2'), 'char *'),
+#            ('variable', ID('file2.c', '', 'var1'), 'char *'),
+#            ]
+        self._return_handle_var = list(expected)  # a copy
+
+        known = from_file('known.tsv',
+                          handle_var=self._handle_var,
+                          _read_file=self._read_file,
+                          )
+
+        self.assertEqual(known, {
+            'variables': {v.id: v for v in expected},
+            })
+#                Variable.from_parts('file1.c', '', 'var1', 'static int'),
+#                Variable.from_parts('file1.c', 'func1', 'local1', 'static int'),
+#                Variable.from_parts('file1.c', '', 'var2', 'int'),
+#                Variable.from_parts('file1.c', 'func2', 'local2', 'char *'),
+#                Variable.from_parts('file2.c', '', 'var1', 'char *'),
+#                ]},
+#            })
+        self.assertEqual(self.calls, [
+            ('_read_file', ('known.tsv',)),
+            *[('_handle_var', (v.id, v.vartype))
+              for v in expected],
+            ])
+
+    def test_empty(self):
+        self._return_read_file = []
+
+        known = from_file('known.tsv',
+                          handle_var=self._handle_var,
+                          _read_file=self._read_file,
+                          )
+
+        self.assertEqual(known, {
+            'variables': {},
+            })
+        self.assertEqual(self.calls, [
+            ('_read_file', ('known.tsv',)),
+            ])
index 9afe059b28c6641f38efb3fc69e928781a065110..b36b791241d5396243ece0e9eae9e282b043ef35 100644 (file)
@@ -1,6 +1,6 @@
 # This is a script equivalent of running "python -m test.test_c_globals.cg".
 
-from c_globals.__main__ import parse_args, main
+from cpython.__main__ import parse_args, main
 
 
 # This is effectively copied from cg/__main__.py:
similarity index 82%
rename from Tools/c-analyzer/c_analyzer_common/files.py
rename to Tools/c-analyzer/c_analyzer/common/files.py
index b3cd16c8dc00807826cf6c55eb33d8812c8fa513..ab551a84bad15dcb6c1e9fc1c290d3ff92154f7a 100644 (file)
@@ -2,7 +2,10 @@ import glob
 import os
 import os.path
 
-from . import SOURCE_DIRS, REPO_ROOT
+# XXX need tests:
+# * walk_tree()
+# * glob_tree()
+# * iter_files_by_suffix()
 
 
 C_SOURCE_SUFFIXES = ('.c', '.h')
@@ -115,24 +118,3 @@ def iter_files_by_suffix(root, suffixes, relparent=None, *,
     # XXX Ignore repeated suffixes?
     for suffix in suffixes:
         yield from _iter_files(root, suffix, relparent)
-
-
-def iter_cpython_files(*,
-                       walk=walk_tree,
-                       _files=iter_files_by_suffix,
-                       ):
-    """Yield each file in the tree for each of the given directory names."""
-    excludedtrees = [
-        os.path.join('Include', 'cpython', ''),
-        ]
-    def is_excluded(filename):
-        for root in excludedtrees:
-            if filename.startswith(root):
-                return True
-        return False
-    for filename in _files(SOURCE_DIRS, C_SOURCE_SUFFIXES, REPO_ROOT,
-                           walk=walk,
-                           ):
-        if is_excluded(filename):
-            continue
-        yield filename
diff --git a/Tools/c-analyzer/c_analyzer/common/info.py b/Tools/c-analyzer/c_analyzer/common/info.py
new file mode 100644 (file)
index 0000000..3f3f8c5
--- /dev/null
@@ -0,0 +1,138 @@
+from collections import namedtuple
+import re
+
+from .util import classonly, _NTBase
+
+# XXX need tests:
+# * ID.match()
+
+
+UNKNOWN = '???'
+
+NAME_RE = re.compile(r'^([a-zA-Z]|_\w*[a-zA-Z]\w*|[a-zA-Z]\w*)$')
+
+
+class ID(_NTBase, namedtuple('ID', 'filename funcname name')):
+    """A unique ID for a single symbol or declaration."""
+
+    __slots__ = ()
+    # XXX Add optional conditions (tuple of strings) field.
+    #conditions = Slot()
+
+    @classonly
+    def from_raw(cls, raw):
+        if not raw:
+            return None
+        if isinstance(raw, str):
+            return cls(None, None, raw)
+        try:
+            name, = raw
+            filename = None
+        except ValueError:
+            try:
+                filename, name = raw
+            except ValueError:
+                return super().from_raw(raw)
+        return cls(filename, None, name)
+
+    def __new__(cls, filename, funcname, name):
+        self = super().__new__(
+                cls,
+                filename=str(filename) if filename else None,
+                funcname=str(funcname) if funcname else None,
+                name=str(name) if name else None,
+                )
+        #cls.conditions.set(self, tuple(str(s) if s else None
+        #                               for s in conditions or ()))
+        return self
+
+    def validate(self):
+        """Fail if the object is invalid (i.e. init with bad data)."""
+        if not self.name:
+            raise TypeError('missing name')
+        else:
+            if not NAME_RE.match(self.name):
+                raise ValueError(
+                        f'name must be an identifier, got {self.name!r}')
+
+        # Symbols from a binary might not have filename/funcname info.
+
+        if self.funcname:
+            if not self.filename:
+                raise TypeError('missing filename')
+            if not NAME_RE.match(self.funcname) and self.funcname != UNKNOWN:
+                raise ValueError(
+                        f'name must be an identifier, got {self.funcname!r}')
+
+        # XXX Require the filename (at least UNKONWN)?
+        # XXX Check the filename?
+
+    @property
+    def islocal(self):
+        return self.funcname is not None
+
+    def match(self, other, *,
+              match_files=(lambda f1, f2: f1 == f2),
+              ):
+        """Return True if the two match.
+
+        At least one of the two must be completely valid (no UNKNOWN
+        anywhere).  Otherwise False is returned.  The remaining one
+        *may* have UNKNOWN for both funcname and filename.  It must
+        have a valid name though.
+
+        The caller is responsible for knowing which of the two is valid
+        (and which to use if both are valid).
+        """
+        # First check the name.
+        if self.name is None:
+            return False
+        if other.name != self.name:
+            return False
+
+        # Then check the filename.
+        if self.filename is None:
+            return False
+        if other.filename is None:
+            return False
+        if self.filename == UNKNOWN:
+            # "other" must be the valid one.
+            if other.funcname == UNKNOWN:
+                return False
+            elif self.funcname != UNKNOWN:
+                # XXX Try matching funcname even though we don't
+                # know the filename?
+                raise NotImplementedError
+            else:
+                return True
+        elif other.filename == UNKNOWN:
+            # "self" must be the valid one.
+            if self.funcname == UNKNOWN:
+                return False
+            elif other.funcname != UNKNOWN:
+                # XXX Try matching funcname even though we don't
+                # know the filename?
+                raise NotImplementedError
+            else:
+                return True
+        elif not match_files(self.filename, other.filename):
+            return False
+
+        # Finally, check the funcname.
+        if self.funcname == UNKNOWN:
+            # "other" must be the valid one.
+            if other.funcname == UNKNOWN:
+                return False
+            else:
+                return other.funcname is not None
+        elif other.funcname == UNKNOWN:
+            # "self" must be the valid one.
+            if self.funcname == UNKNOWN:
+                return False
+            else:
+                return self.funcname is not None
+        elif self.funcname == other.funcname:
+            # Both are valid.
+            return True
+
+        return False
diff --git a/Tools/c-analyzer/c_analyzer/common/show.py b/Tools/c-analyzer/c_analyzer/common/show.py
new file mode 100644 (file)
index 0000000..5f3cb1c
--- /dev/null
@@ -0,0 +1,11 @@
+
+def basic(variables, *,
+          _print=print):
+    """Print each row simply."""
+    for var in variables:
+        if var.funcname:
+            line = f'{var.filename}:{var.funcname}():{var.name}'
+        else:
+            line = f'{var.filename}:{var.name}'
+        line = f'{line:<64} {var.vartype}'
+        _print(line)
similarity index 83%
rename from Tools/c-analyzer/c_parser/declarations.py
rename to Tools/c-analyzer/c_analyzer/parser/declarations.py
index 19fa3ff4e66bb13a2f89af5b1706d67fb0f6f9dd..f37072cccad8641aeee948e962242b27a90b857a 100644 (file)
@@ -2,6 +2,8 @@ import re
 import shlex
 import subprocess
 
+from ..common.info import UNKNOWN
+
 from . import source
 
 
@@ -194,7 +196,28 @@ def parse_func(stmt, body):
     return name, signature
 
 
-def parse_var(stmt):
+#TYPE_SPEC = rf'''(?:
+#        )'''
+#VAR_DECLARATOR = rf'''(?:
+#        )'''
+#VAR_DECL = rf'''(?:
+#            {TYPE_SPEC}+
+#            {VAR_DECLARATOR}
+#            \s*
+#        )'''
+#VAR_DECLARATION = rf'''(?:
+#            {VAR_DECL}
+#            (?: = [^=] [^;]* )?
+#            ;
+#        )'''
+#
+#
+#def parse_variable(decl, *, inFunc=False):
+#    """Return [(name, storage, vartype)] for the given variable declaration."""
+#    ...
+
+
+def _parse_var(stmt):
     """Return (name, vartype) for the given variable declaration."""
     stmt = stmt.rstrip(';')
     m = LOCAL_STMT_START_RE.match(stmt)
@@ -220,6 +243,27 @@ def parse_var(stmt):
     return name, vartype
 
 
+def extract_storage(decl, *, infunc=None):
+    """Return (storage, vartype) based on the given declaration.
+
+    The default storage is "implicit" (or "local" if infunc is True).
+    """
+    if decl == UNKNOWN:
+        return decl
+    if decl.startswith('static '):
+        return 'static'
+        #return 'static', decl.partition(' ')[2].strip()
+    elif decl.startswith('extern '):
+        return 'extern'
+        #return 'extern', decl.partition(' ')[2].strip()
+    elif re.match('.*\b(static|extern)\b', decl):
+        raise NotImplementedError
+    elif infunc:
+        return 'local'
+    else:
+        return 'implicit'
+
+
 def parse_compound(stmt, blocks):
     """Return (headers, bodies) for the given compound statement."""
     # XXX Identify declarations inside compound statements
@@ -228,14 +272,17 @@ def parse_compound(stmt, blocks):
 
 
 def iter_variables(filename, *,
+                   preprocessed=False,
                    _iter_source_lines=source.iter_lines,
                    _iter_global=iter_global_declarations,
                    _iter_local=iter_local_statements,
                    _parse_func=parse_func,
-                   _parse_var=parse_var,
+                   _parse_var=_parse_var,
                    _parse_compound=parse_compound,
                    ):
     """Yield (funcname, name, vartype) for every variable in the given file."""
+    if preprocessed:
+        raise NotImplementedError
     lines = _iter_source_lines(filename)
     for stmt, body in _iter_global(lines):
         # At the file top-level we only have to worry about vars & funcs.
@@ -256,7 +303,7 @@ def iter_variables(filename, *,
 
 def _iter_locals(lines, *,
                  _iter_statements=iter_local_statements,
-                 _parse_var=parse_var,
+                 _parse_var=_parse_var,
                  _parse_compound=parse_compound,
                  ):
     compound = [lines]
@@ -278,18 +325,15 @@ def _iter_locals(lines, *,
                 compound.extend(bodies)
 
 
-def iter_all(dirnames):
+def iter_all(filename, *,
+             preprocessed=False,
+             ):
     """Yield a Declaration for each one found.
 
     If there are duplicates, due to preprocessor conditionals, then
     they are checked to make sure they are the same.
     """
-    raise NotImplementedError
-
-
-def iter_preprocessed(dirnames):
-    """Yield a Declaration for each one found.
-
-    All source files are run through the preprocessor first.
-    """
-    raise NotImplementedError
+    # XXX For the moment we cheat.
+    for funcname, name, decl in iter_variables(filename,
+                                               preprocessed=preprocessed):
+        yield 'variable', funcname, name, decl
diff --git a/Tools/c-analyzer/c_analyzer/parser/find.py b/Tools/c-analyzer/c_analyzer/parser/find.py
new file mode 100644 (file)
index 0000000..3860d3d
--- /dev/null
@@ -0,0 +1,107 @@
+from ..common.info import UNKNOWN, ID
+
+from . import declarations
+
+# XXX need tests:
+# * variables
+# * variable
+# * variable_from_id
+
+
+def _iter_vars(filenames, preprocessed, *,
+               handle_id=None,
+               _iter_decls=declarations.iter_all,
+               ):
+    if handle_id is None:
+        handle_id = ID
+
+    for filename in filenames or ():
+        for kind, funcname, name, decl in _iter_decls(filename,
+                                                      preprocessed=preprocessed,
+                                                      ):
+            if kind != 'variable':
+                continue
+            varid = handle_id(filename, funcname, name)
+            yield varid, decl
+
+
+# XXX Add a "handle_var" arg like we did for get_resolver()?
+
+def variables(*filenames,
+              perfilecache=None,
+              preprocessed=False,
+              known=None,  # for types
+              handle_id=None,
+              _iter_vars=_iter_vars,
+              ):
+    """Yield (varid, decl) for each variable found in the given files.
+
+    If "preprocessed" is provided (and not False/None) then it is used
+    to decide which tool to use to parse the source code after it runs
+    through the C preprocessor.  Otherwise the raw
+    """
+    if len(filenames) == 1 and not (filenames[0], str):
+        filenames, = filenames
+
+    if perfilecache is None:
+        yield from _iter_vars(filenames, preprocessed)
+    else:
+        # XXX Cache per-file variables (e.g. `{filename: [(varid, decl)]}`).
+        raise NotImplementedError
+
+
+def variable(name, filenames, *,
+             local=False,
+             perfilecache=None,
+             preprocessed=False,
+             handle_id=None,
+             _iter_vars=variables,
+             ):
+    """Return (varid, decl) for the first found variable that matches.
+
+    If "local" is True then the first matching local variable in the
+    file will always be returned.  To avoid that, pass perfilecache and
+    pop each variable from the cache after using it.
+    """
+    for varid, decl in _iter_vars(filenames,
+                                  perfilecache=perfilecache,
+                                  preprocessed=preprocessed,
+                                  ):
+        if varid.name != name:
+            continue
+        if local:
+            if varid.funcname:
+                if varid.funcname == UNKNOWN:
+                    raise NotImplementedError
+                return varid, decl
+        elif not varid.funcname:
+            return varid, decl
+    else:
+        return None, None  # No matching variable was found.
+
+
+def variable_from_id(id, filenames, *,
+                     perfilecache=None,
+                     preprocessed=False,
+                     handle_id=None,
+                     _get_var=variable,
+                     ):
+    """Return (varid, decl) for the first found variable that matches."""
+    local = False
+    if isinstance(id, str):
+        name = id
+    else:
+        if id.funcname == UNKNOWN:
+            local = True
+        elif id.funcname:
+            raise NotImplementedError
+
+        name = id.name
+        if id.filename and id.filename != UNKNOWN:
+            filenames = [id.filename]
+    return _get_var(name, filenames,
+                    local=local,
+                    perfilecache=perfilecache,
+                    preprocessed=preprocessed,
+                    handle_id=handle_id,
+                    )
similarity index 80%
rename from Tools/c-analyzer/c_parser/naive.py
rename to Tools/c-analyzer/c_analyzer/parser/naive.py
index 160f96c279e26160939c5372210114c8997880e4..4a4822d84ff54d9264617e211b9f765c88285527 100644 (file)
@@ -1,8 +1,7 @@
 import re
 
-from c_analyzer_common.info import UNKNOWN
+from ..common.info import UNKNOWN, ID
 
-from .info import Variable
 from .preprocessor import _iter_clean_lines
 
 
@@ -55,7 +54,7 @@ def parse_variable_declaration(srcline):
 
 
 def parse_variable(srcline, funcname=None):
-    """Return a Variable for the variable declared on the line (or None)."""
+    """Return (varid, decl) for the variable declared on the line (or None)."""
     line = srcline.strip()
 
     # XXX Handle more than just static variables.
@@ -74,7 +73,7 @@ def iter_variables(filename, *,
                    _get_srclines=get_srclines,
                    _default_parse_variable=parse_variable,
                    ):
-    """Yield a Variable for each in the given source file."""
+    """Yield (varid, decl) for each variable in the given source file."""
     if parse_variable is None:
         parse_variable = _default_parse_variable
 
@@ -99,13 +98,13 @@ def iter_variables(filename, *,
         info = parse_variable(line, funcname)
         if isinstance(info, list):
             for name, _funcname, decl in info:
-                yield Variable.from_parts(filename, _funcname, name, decl)
+                yield ID(filename, _funcname, name), decl
             continue
         name, decl = info
 
         if name is None:
             continue
-        yield Variable.from_parts(filename, funcname, name, decl)
+        yield ID(filename, funcname, name), decl
 
 
 def _match_varid(variable, name, funcname, ignored=None):
@@ -134,12 +133,12 @@ def find_variable(filename, funcname, name, *,
 
     Return None if the variable is not found.
     """
-    for variable in _iter_variables(filename,
+    for varid, decl in _iter_variables(filename,
                                     srccache=srccache,
                                     parse_variable=parse_variable,
                                     ):
-        if _match_varid(variable, name, funcname, ignored):
-            return variable
+        if _match_varid(varid, name, funcname, ignored):
+            return varid, decl
     else:
         return None
 
@@ -149,10 +148,10 @@ def find_variables(varids, filenames=None, *,
                    parse_variable=None,
                    _find_symbol=find_variable,
                    ):
-    """Yield a Variable for each ID.
+    """Yield (varid, decl) for each ID.
 
     If the variable is not found then its decl will be UNKNOWN.  That
-    way there will be one resulting Variable per given ID.
+    way there will be one resulting variable per given ID.
     """
     if srccache is _NOT_SET:
         srccache = {}
@@ -163,18 +162,18 @@ def find_variables(varids, filenames=None, *,
             srcfiles = [varid.filename]
         else:
             if not filenames:
-                yield Variable(varid, UNKNOWN, UNKNOWN)
+                yield varid, UNKNOWN
                 continue
             srcfiles = filenames
         for filename in srcfiles:
-            found = _find_varid(filename, varid.funcname, varid.name,
-                                 ignored=used,
-                                 srccache=srccache,
-                                 parse_variable=parse_variable,
-                                 )
-            if found:
-                yield found
-                used.add(found)
+            varid, decl = _find_varid(filename, varid.funcname, varid.name,
+                                      ignored=used,
+                                      srccache=srccache,
+                                      parse_variable=parse_variable,
+                                      )
+            if varid:
+                yield varid, decl
+                used.add(varid)
                 break
         else:
-            yield Variable(varid, UNKNOWN, UNKNOWN)
+            yield varid, UNKNOWN
similarity index 99%
rename from Tools/c-analyzer/c_parser/preprocessor.py
rename to Tools/c-analyzer/c_analyzer/parser/preprocessor.py
index 0e2866e4873e678648fa1ed98a09edbae784e92b..41f306e5f8022bd142ba7f0edaafde1193b8afc1 100644 (file)
@@ -3,8 +3,7 @@ import shlex
 import os
 import re
 
-from c_analyzer_common import util
-from . import info
+from ..common import util, info
 
 
 CONTINUATION = '\\' + os.linesep
diff --git a/Tools/c-analyzer/c_analyzer/symbols/__init__.py b/Tools/c-analyzer/c_analyzer/symbols/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
similarity index 50%
rename from Tools/c-analyzer/c_symbols/binary.py
rename to Tools/c-analyzer/c_analyzer/symbols/_nm.py
index e125dbd5b5edc53703976cf734843124c43022ad..f3a75a6d4ba8241900e94a8c8f52df7327ebc3a8 100644 (file)
@@ -1,46 +1,24 @@
-import os
 import os.path
 import shutil
-import sys
-
-from c_analyzer_common import util, info
-from . import source
-from .info import Symbol
-
-
-#PYTHON = os.path.join(REPO_ROOT, 'python')
-PYTHON = sys.executable
 
+from c_analyzer.common import util, info
 
-def iter_symbols(binary=PYTHON, dirnames=None, *,
-                 # Alternately, use look_up_known_symbol()
-                 # from c_globals.supported.
-                 find_local_symbol=source.find_symbol,
-                 _file_exists=os.path.exists,
-                 _iter_symbols_nm=(lambda b, *a: _iter_symbols_nm(b, *a)),
-                 ):
-    """Yield a Symbol for each symbol found in the binary."""
-    if not _file_exists(binary):
-        raise Exception('executable missing (need to build it first?)')
-
-    if find_local_symbol:
-        cache = {}
-        def find_local_symbol(name, *, _find=find_local_symbol):
-            return _find(name, dirnames, _perfilecache=cache)
-    else:
-        find_local_symbol = None
+from .info import Symbol
 
-    if os.name == 'nt':
-        # XXX Support this.
-        raise NotImplementedError
-    else:
-        yield from _iter_symbols_nm(binary, find_local_symbol)
 
+# XXX need tests:
+# * iter_symbols
 
-#############################
-# binary format (e.g. ELF)
+NM_KINDS = {
+        'b': Symbol.KIND.VARIABLE,  # uninitialized
+        'd': Symbol.KIND.VARIABLE,  # initialized
+        #'g': Symbol.KIND.VARIABLE,  # uninitialized
+        #'s': Symbol.KIND.VARIABLE,  # initialized
+        't': Symbol.KIND.FUNCTION,
+        }
 
 SPECIAL_SYMBOLS = {
+        # binary format (e.g. ELF)
         '__bss_start',
         '__data_start',
         '__dso_handle',
@@ -63,29 +41,23 @@ def _is_special_symbol(name):
     return False
 
 
-#############################
-# "nm"
-
-NM_KINDS = {
-        'b': Symbol.KIND.VARIABLE,  # uninitialized
-        'd': Symbol.KIND.VARIABLE,  # initialized
-        #'g': Symbol.KIND.VARIABLE,  # uninitialized
-        #'s': Symbol.KIND.VARIABLE,  # initialized
-        't': Symbol.KIND.FUNCTION,
-        }
-
+def iter_symbols(binfile, *,
+                 nm=None,
+                 handle_id=None,
+                 _which=shutil.which,
+                 _run=util.run_cmd,
+                 ):
+    """Yield a Symbol for each relevant entry reported by the "nm" command."""
+    if nm is None:
+        nm = _which('nm')
+        if not nm:
+            raise NotImplementedError
+    if handle_id is None:
+        handle_id = info.ID
 
-def _iter_symbols_nm(binary, find_local_symbol=None,
-                     *,
-                     _which=shutil.which,
-                     _run=util.run_cmd,
-                     ):
-    nm = _which('nm')
-    if not nm:
-        raise NotImplementedError
     argv = [nm,
             '--line-numbers',
-            binary,
+            binfile,
             ]
     try:
         output = _run(argv)
@@ -95,23 +67,20 @@ def _iter_symbols_nm(binary, find_local_symbol=None,
             raise NotImplementedError
         raise
     for line in output.splitlines():
-        (name, kind, external, filename, funcname, vartype,
-         ) = _parse_nm_line(line,
-                            _find_local_symbol=find_local_symbol,
-                            )
+        (name, kind, external, filename, funcname,
+         ) = _parse_nm_line(line)
         if kind != Symbol.KIND.VARIABLE:
             continue
         elif _is_special_symbol(name):
             continue
-        assert vartype is None
         yield Symbol(
-                id=(filename, funcname, name),
+                id=handle_id(filename, funcname, name),
                 kind=kind,
                 external=external,
                 )
 
 
-def _parse_nm_line(line, *, _find_local_symbol=None):
+def _parse_nm_line(line):
     _origline = line
     _, _, line = line.partition(' ')  # strip off the address
     line = line.strip()
@@ -128,18 +97,9 @@ def _parse_nm_line(line, *, _find_local_symbol=None):
     else:
         filename = info.UNKNOWN
 
-    vartype = None
     name, islocal = _parse_nm_name(name, kind)
-    if islocal:
-        funcname = info.UNKNOWN
-        if _find_local_symbol is not None:
-            filename, funcname, vartype = _find_local_symbol(name)
-            filename = filename or info.UNKNOWN
-            funcname = funcname or info.UNKNOWN
-    else:
-        funcname = None
-        # XXX fine filename and vartype?
-    return name, kind, external, filename, funcname, vartype
+    funcname = info.UNKNOWN if islocal else None
+    return name, kind, external, filename, funcname
 
 
 def _parse_nm_name(name, kind):
diff --git a/Tools/c-analyzer/c_analyzer/symbols/find.py b/Tools/c-analyzer/c_analyzer/symbols/find.py
new file mode 100644 (file)
index 0000000..8564652
--- /dev/null
@@ -0,0 +1,175 @@
+import os
+import os.path
+import shutil
+
+from ..common import files
+from ..common.info import UNKNOWN, ID
+from ..parser import find as p_find
+
+from . import _nm
+from .info import Symbol
+
+# XXX need tests:
+# * get_resolver()
+# * get_resolver_from_dirs()
+# * symbol()
+# * symbols()
+# * variables()
+
+
+def _resolve_known(symbol, knownvars):
+    for varid in knownvars:
+        if symbol.match(varid):
+            break
+    else:
+        return None
+    return knownvars.pop(varid)
+
+
+def get_resolver(filenames=None, known=None, *,
+                 handle_var,
+                 check_filename=None,
+                 perfilecache=None,
+                 preprocessed=False,
+                 _from_source=p_find.variable_from_id,
+                 ):
+    """Return a "resolver" func for the given known vars/types and filenames.
+
+    "handle_var" is a callable that takes (ID, decl) and returns a
+    Variable.  Variable.from_id is a suitable callable.
+
+    The returned func takes a single Symbol and returns a corresponding
+    Variable.  If the symbol was located then the variable will be
+    valid, populated with the corresponding information.  Otherwise None
+    is returned.
+    """
+    knownvars = (known or {}).get('variables')
+    if knownvars:
+        knownvars = dict(knownvars)  # a copy
+        if filenames:
+            if check_filename is None:
+                filenames = list(filenames)
+                def check_filename(filename):
+                    return filename in filenames
+            def resolve(symbol):
+                # XXX Check "found" instead?
+                if not check_filename(symbol.filename):
+                    return None
+                found = _resolve_known(symbol, knownvars)
+                if found is None:
+                    #return None
+                    varid, decl = _from_source(symbol, filenames,
+                                               perfilecache=perfilecache,
+                                               preprocessed=preprocessed,
+                                               )
+                    found = handle_var(varid, decl)
+                return found
+        else:
+            def resolve(symbol):
+                return _resolve_known(symbol, knownvars)
+    elif filenames:
+        def resolve(symbol):
+            varid, decl = _from_source(symbol, filenames,
+                                       perfilecache=perfilecache,
+                                       preprocessed=preprocessed,
+                                       )
+            return handle_var(varid, decl)
+    else:
+        def resolve(symbol):
+            return None
+    return resolve
+
+
+def get_resolver_from_dirs(dirnames, known=None, *,
+                           handle_var,
+                           suffixes=('.c',),
+                           perfilecache=None,
+                           preprocessed=False,
+                           _iter_files=files.iter_files_by_suffix,
+                           _get_resolver=get_resolver,
+                           ):
+    """Return a "resolver" func for the given known vars/types and filenames.
+
+    "dirnames" should be absolute paths.  If not then they will be
+    resolved relative to CWD.
+
+    See get_resolver().
+    """
+    dirnames = [d if d.endswith(os.path.sep) else d + os.path.sep
+                for d in dirnames]
+    filenames = _iter_files(dirnames, suffixes)
+    def check_filename(filename):
+        for dirname in dirnames:
+            if filename.startswith(dirname):
+                return True
+        else:
+            return False
+    return _get_resolver(filenames, known,
+                         handle_var=handle_var,
+                         check_filename=check_filename,
+                         perfilecache=perfilecache,
+                         preprocessed=preprocessed,
+                         )
+
+
+def symbol(symbol, filenames, known=None, *,
+           perfilecache=None,
+           preprocessed=False,
+           handle_id=None,
+           _get_resolver=get_resolver,
+           ):
+    """Return a Variable for the one matching the given symbol.
+
+    "symbol" can be one of several objects:
+
+    * Symbol - use the contained info
+    * name (str) - look for a global variable with that name
+    * (filename, name) - look for named global in file
+    * (filename, funcname, name) - look for named local in file
+
+    A name is always required.  If the filename is None, "", or
+    "UNKNOWN" then all files will be searched.  If the funcname is
+    "" or "UNKNOWN" then only local variables will be searched for.
+    """
+    resolve = _get_resolver(known, filenames,
+                            handle_id=handle_id,
+                            perfilecache=perfilecache,
+                            preprocessed=preprocessed,
+                            )
+    return resolve(symbol)
+
+
+def _get_platform_tool():
+    if os.name == 'nt':
+        # XXX Support this.
+        raise NotImplementedError
+    elif nm := shutil.which('nm'):
+        return lambda b, hi: _nm.iter_symbols(b, nm=nm, handle_id=hi)
+    else:
+        raise NotImplementedError
+
+
+def symbols(binfile, *,
+            handle_id=None,
+            _file_exists=os.path.exists,
+            _get_platform_tool=_get_platform_tool,
+            ):
+    """Yield a Symbol for each one found in the binary."""
+    if not _file_exists(binfile):
+        raise Exception('executable missing (need to build it first?)')
+
+    _iter_symbols = _get_platform_tool()
+    yield from _iter_symbols(binfile, handle_id)
+
+
+def variables(binfile, *,
+              resolve,
+              handle_id=None,
+              _iter_symbols=symbols,
+              ):
+    """Yield (Variable, Symbol) for each found symbol."""
+    for symbol in _iter_symbols(binfile, handle_id=handle_id):
+        if symbol.kind != Symbol.KIND.VARIABLE:
+            continue
+        var = resolve(symbol) or None
+        yield var, symbol
similarity index 93%
rename from Tools/c-analyzer/c_symbols/info.py
rename to Tools/c-analyzer/c_analyzer/symbols/info.py
index f6ed52c8f071421826d474df3967d1595015fe3b..96a251abb7c7fdfe625b9eed2cba3f65d768d36d 100644 (file)
@@ -1,7 +1,7 @@
 from collections import namedtuple
 
-from c_analyzer_common.info import ID
-from c_analyzer_common.util import classonly, _NTBase
+from c_analyzer.common.info import ID
+from c_analyzer.common.util import classonly, _NTBase
 
 
 class Symbol(_NTBase, namedtuple('Symbol', 'id kind external')):
diff --git a/Tools/c-analyzer/c_analyzer/variables/__init__.py b/Tools/c-analyzer/c_analyzer/variables/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/Tools/c-analyzer/c_analyzer/variables/find.py b/Tools/c-analyzer/c_analyzer/variables/find.py
new file mode 100644 (file)
index 0000000..3fe7284
--- /dev/null
@@ -0,0 +1,75 @@
+from ..common import files
+from ..common.info import UNKNOWN
+from ..parser import (
+        find as p_find,
+        )
+from ..symbols import (
+        info as s_info,
+        find as s_find,
+        )
+from .info import Variable
+
+# XXX need tests:
+# * vars_from_source
+
+
+def _remove_cached(cache, var):
+    if not cache:
+        return
+    try:
+        cached = cache[var.filename]
+        cached.remove(var)
+    except (KeyError, IndexError):
+        pass
+
+
+def vars_from_binary(binfile, *,
+                     known=None,
+                     filenames=None,
+                     handle_id=None,
+                     check_filename=None,
+                     handle_var=Variable.from_id,
+                     _iter_vars=s_find.variables,
+                     _get_symbol_resolver=s_find.get_resolver,
+                     ):
+    """Yield a Variable for each found Symbol.
+
+    Details are filled in from the given "known" variables and types.
+    """
+    cache = {}
+    resolve = _get_symbol_resolver(filenames, known,
+                                   handle_var=handle_var,
+                                   check_filename=check_filename,
+                                   perfilecache=cache,
+                                   )
+    for var, symbol in _iter_vars(binfile,
+                                  resolve=resolve,
+                                  handle_id=handle_id,
+                                  ):
+        if var is None:
+            var = Variable(symbol.id, UNKNOWN, UNKNOWN)
+        yield var
+        _remove_cached(cache, var)
+
+
+def vars_from_source(filenames, *,
+                     preprocessed=None,
+                     known=None,
+                     handle_id=None,
+                     handle_var=Variable.from_id,
+                     iter_vars=p_find.variables,
+                     ):
+    """Yield a Variable for each declaration in the raw source code.
+
+    Details are filled in from the given "known" variables and types.
+    """
+    cache = {}
+    for varid, decl in iter_vars(filenames or (),
+                                 perfilecache=cache,
+                                 preprocessed=preprocessed,
+                                 known=known,
+                                 handle_id=handle_id,
+                                 ):
+        var = handle_var(varid, decl)
+        yield var
+        _remove_cached(cache, var)
similarity index 61%
rename from Tools/c-analyzer/c_parser/info.py
rename to Tools/c-analyzer/c_analyzer/variables/info.py
index a4e32d75eed73fc7ef536c008aa5fb5c120071ed..336a523c7a2dba05068cdf25e5fb0d91cc55c4fd 100644 (file)
@@ -1,8 +1,7 @@
 from collections import namedtuple
-import re
 
-from c_analyzer_common import info, util
-from c_analyzer_common.util import classonly, _NTBase
+from ..common.info import ID, UNKNOWN
+from ..common.util import classonly, _NTBase
 
 
 def normalize_vartype(vartype):
@@ -16,26 +15,7 @@ def normalize_vartype(vartype):
     return str(vartype)
 
 
-def extract_storage(decl, *, isfunc=False):
-    """Return (storage, vartype) based on the given declaration.
-
-    The default storage is "implicit" or "local".
-    """
-    if decl == info.UNKNOWN:
-        return decl, decl
-    if decl.startswith('static '):
-        return 'static', decl
-        #return 'static', decl.partition(' ')[2].strip()
-    elif decl.startswith('extern '):
-        return 'extern', decl
-        #return 'extern', decl.partition(' ')[2].strip()
-    elif re.match('.*\b(static|extern)\b', decl):
-        raise NotImplementedError
-    elif isfunc:
-        return 'local', decl
-    else:
-        return 'implicit', decl
-
+# XXX Variable.vartype -> decl (Declaration).
 
 class Variable(_NTBase,
                namedtuple('Variable', 'id storage vartype')):
@@ -52,16 +32,23 @@ class Variable(_NTBase,
 
     @classonly
     def from_parts(cls, filename, funcname, name, decl, storage=None):
+        varid = ID(filename, funcname, name)
         if storage is None:
-            storage, decl = extract_storage(decl, isfunc=funcname)
-        id = info.ID(filename, funcname, name)
-        self = cls(id, storage, decl)
+            self = cls.from_id(varid, decl)
+        else:
+            self = cls(varid, storage, decl)
         return self
 
+    @classonly
+    def from_id(cls, varid, decl):
+        from ..parser.declarations import extract_storage
+        storage = extract_storage(decl, infunc=varid.funcname)
+        return cls(varid, storage, decl)
+
     def __new__(cls, id, storage, vartype):
         self = super().__new__(
                 cls,
-                id=info.ID.from_raw(id),
+                id=ID.from_raw(id),
                 storage=str(storage) if storage else None,
                 vartype=normalize_vartype(vartype) if vartype else None,
                 )
@@ -77,10 +64,10 @@ class Variable(_NTBase,
         if not self.id:
             raise TypeError('missing id')
 
-        if not self.filename or self.filename == info.UNKNOWN:
+        if not self.filename or self.filename == UNKNOWN:
             raise TypeError(f'id missing filename ({self.id})')
 
-        if self.funcname and self.funcname == info.UNKNOWN:
+        if self.funcname and self.funcname == UNKNOWN:
             raise TypeError(f'id missing funcname ({self.id})')
 
         self.id.validate()
@@ -89,12 +76,12 @@ class Variable(_NTBase,
         """Fail if the object is invalid (i.e. init with bad data)."""
         self._validate_id()
 
-        if self.storage is None or self.storage == info.UNKNOWN:
+        if self.storage is None or self.storage == UNKNOWN:
             raise TypeError('missing storage')
         elif self.storage not in self.STORAGE:
             raise ValueError(f'unsupported storage {self.storage:r}')
 
-        if self.vartype is None or self.vartype == info.UNKNOWN:
+        if self.vartype is None or self.vartype == UNKNOWN:
             raise TypeError('missing vartype')
 
     @property
diff --git a/Tools/c-analyzer/c_analyzer/variables/known.py b/Tools/c-analyzer/c_analyzer/variables/known.py
new file mode 100644 (file)
index 0000000..aa2934a
--- /dev/null
@@ -0,0 +1,91 @@
+import csv
+
+from ..common.info import ID, UNKNOWN
+from ..common.util import read_tsv
+from .info import Variable
+
+
+# XXX need tests:
+# * read_file()
+# * look_up_variable()
+
+
+COLUMNS = ('filename', 'funcname', 'name', 'kind', 'declaration')
+HEADER = '\t'.join(COLUMNS)
+
+
+def read_file(infile, *,
+              _read_tsv=read_tsv,
+              ):
+    """Yield (kind, id, decl) for each row in the data file.
+
+    The caller is responsible for validating each row.
+    """
+    for row in _read_tsv(infile, HEADER):
+        filename, funcname, name, kind, declaration = row
+        if not funcname or funcname == '-':
+            funcname = None
+        id = ID(filename, funcname, name)
+        yield kind, id, declaration
+
+
+def from_file(infile, *,
+              handle_var=Variable.from_id,
+              _read_file=read_file,
+              ):
+    """Return the info for known declarations in the given file."""
+    known = {
+        'variables': {},
+        #'types': {},
+        #'constants': {},
+        #'macros': {},
+        }
+    for kind, id, decl in _read_file(infile):
+        if kind == 'variable':
+            values = known['variables']
+            value = handle_var(id, decl)
+        else:
+            raise ValueError(f'unsupported kind in row {row}')
+        value.validate()
+        values[id] = value
+    return known
+
+
+def look_up_variable(varid, knownvars, *,
+                     match_files=(lambda f1, f2: f1 == f2),
+                     ):
+    """Return the known Variable matching the given ID.
+
+    "knownvars" is a mapping of ID to Variable.
+
+    "match_files" is used to verify if two filenames point to
+    the same file.
+
+    If no match is found then None is returned.
+    """
+    if not knownvars:
+        return None
+
+    if varid.funcname == UNKNOWN:
+        if not varid.filename or varid.filename == UNKNOWN:
+            for varid in knownvars:
+                if not varid.funcname:
+                    continue
+                if varid.name == varid.name:
+                    return knownvars[varid]
+            else:
+                return None
+        else:
+            for varid in knownvars:
+                if not varid.funcname:
+                    continue
+                if not match_files(varid.filename, varid.filename):
+                    continue
+                if varid.name == varid.name:
+                    return knownvars[varid]
+            else:
+                return None
+    elif not varid.filename or varid.filename == UNKNOWN:
+        raise NotImplementedError
+    else:
+        return knownvars.get(varid.id)
diff --git a/Tools/c-analyzer/c_analyzer_common/__init__.py b/Tools/c-analyzer/c_analyzer_common/__init__.py
deleted file mode 100644 (file)
index 888b16f..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-import os.path
-
-
-PKG_ROOT = os.path.dirname(__file__)
-DATA_DIR = os.path.dirname(PKG_ROOT)
-REPO_ROOT = os.path.dirname(
-        os.path.dirname(DATA_DIR))
-
-SOURCE_DIRS = [os.path.join(REPO_ROOT, name) for name in [
-        'Include',
-        'Python',
-        'Parser',
-        'Objects',
-        'Modules',
-        ]]
-
-
-# Clean up the namespace.
-del os
diff --git a/Tools/c-analyzer/c_analyzer_common/info.py b/Tools/c-analyzer/c_analyzer_common/info.py
deleted file mode 100644 (file)
index e217380..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-from collections import namedtuple
-import re
-
-from .util import classonly, _NTBase
-
-
-UNKNOWN = '???'
-
-NAME_RE = re.compile(r'^([a-zA-Z]|_\w*[a-zA-Z]\w*|[a-zA-Z]\w*)$')
-
-
-class ID(_NTBase, namedtuple('ID', 'filename funcname name')):
-    """A unique ID for a single symbol or declaration."""
-
-    __slots__ = ()
-    # XXX Add optional conditions (tuple of strings) field.
-    #conditions = Slot()
-
-    @classonly
-    def from_raw(cls, raw):
-        if not raw:
-            return None
-        if isinstance(raw, str):
-            return cls(None, None, raw)
-        try:
-            name, = raw
-            filename = None
-        except ValueError:
-            try:
-                filename, name = raw
-            except ValueError:
-                return super().from_raw(raw)
-        return cls(filename, None, name)
-
-    def __new__(cls, filename, funcname, name):
-        self = super().__new__(
-                cls,
-                filename=str(filename) if filename else None,
-                funcname=str(funcname) if funcname else None,
-                name=str(name) if name else None,
-                )
-        #cls.conditions.set(self, tuple(str(s) if s else None
-        #                               for s in conditions or ()))
-        return self
-
-    def validate(self):
-        """Fail if the object is invalid (i.e. init with bad data)."""
-        if not self.name:
-            raise TypeError('missing name')
-        else:
-            if not NAME_RE.match(self.name):
-                raise ValueError(
-                        f'name must be an identifier, got {self.name!r}')
-
-        # Symbols from a binary might not have filename/funcname info.
-
-        if self.funcname:
-            if not self.filename:
-                raise TypeError('missing filename')
-            if not NAME_RE.match(self.funcname) and self.funcname != UNKNOWN:
-                raise ValueError(
-                        f'name must be an identifier, got {self.funcname!r}')
-
-        # XXX Require the filename (at least UNKONWN)?
-        # XXX Check the filename?
-
-    @property
-    def islocal(self):
-        return self.funcname is not None
diff --git a/Tools/c-analyzer/c_analyzer_common/known.py b/Tools/c-analyzer/c_analyzer_common/known.py
deleted file mode 100644 (file)
index dec1e1d..0000000
+++ /dev/null
@@ -1,74 +0,0 @@
-import csv
-import os.path
-
-from c_parser.info import Variable
-
-from . import DATA_DIR
-from .info import ID, UNKNOWN
-from .util import read_tsv
-
-
-DATA_FILE = os.path.join(DATA_DIR, 'known.tsv')
-
-COLUMNS = ('filename', 'funcname', 'name', 'kind', 'declaration')
-HEADER = '\t'.join(COLUMNS)
-
-
-# XXX need tests:
-# * from_file()
-
-def from_file(infile, *,
-              _read_tsv=read_tsv,
-              ):
-    """Return the info for known declarations in the given file."""
-    known = {
-        'variables': {},
-        #'types': {},
-        #'constants': {},
-        #'macros': {},
-        }
-    for row in _read_tsv(infile, HEADER):
-        filename, funcname, name, kind, declaration = row
-        if not funcname or funcname == '-':
-            funcname = None
-        id = ID(filename, funcname, name)
-        if kind == 'variable':
-            values = known['variables']
-            if funcname:
-                storage = _get_storage(declaration) or 'local'
-            else:
-                storage = _get_storage(declaration) or 'implicit'
-            value = Variable(id, storage, declaration)
-        else:
-            raise ValueError(f'unsupported kind in row {row}')
-        value.validate()
-#        if value.name == 'id' and declaration == UNKNOWN:
-#            # None of these are variables.
-#            declaration = 'int id';
-#        else:
-#            value.validate()
-        values[id] = value
-    return known
-
-
-def _get_storage(decl):
-    # statics
-    if decl.startswith('static '):
-        return 'static'
-    if decl.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
-        return 'static'
-    if decl.startswith(('_Py_IDENTIFIER(', '_Py_static_string(')):
-        return 'static'
-    if decl.startswith('PyDoc_VAR('):
-        return 'static'
-    if decl.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
-        return 'static'
-    if decl.startswith('WRAP_METHOD('):
-        return 'static'
-    # public extern
-    if decl.startswith('extern '):
-        return 'extern'
-    if decl.startswith('PyAPI_DATA('):
-        return 'extern'
-    # implicit or local
-    return None
diff --git a/Tools/c-analyzer/c_globals/find.py b/Tools/c-analyzer/c_globals/find.py
deleted file mode 100644 (file)
index a51b947..0000000
+++ /dev/null
@@ -1,95 +0,0 @@
-from c_analyzer_common import SOURCE_DIRS
-from c_analyzer_common.info import UNKNOWN
-from c_symbols import (
-        info as s_info,
-        binary as b_symbols,
-        source as s_symbols,
-        resolve,
-        )
-from c_parser import info, declarations
-
-
-# XXX needs tests:
-# * iter_variables
-
-def globals_from_binary(binfile=b_symbols.PYTHON, *,
-                        knownvars=None,
-                        dirnames=None,
-                        _iter_symbols=b_symbols.iter_symbols,
-                        _resolve=resolve.symbols_to_variables,
-                        _get_symbol_resolver=resolve.get_resolver,
-                        ):
-    """Yield a Variable for each found Symbol.
-
-    Details are filled in from the given "known" variables and types.
-    """
-    symbols = _iter_symbols(binfile, find_local_symbol=None)
-    #symbols = list(symbols)
-    for variable in _resolve(symbols,
-                             resolve=_get_symbol_resolver(knownvars, dirnames),
-                             ):
-        # Skip each non-global variable (unless we couldn't find it).
-        # XXX Drop the "UNKNOWN" condition?
-        if not variable.isglobal and variable.vartype != UNKNOWN:
-            continue
-        yield variable
-
-
-def globals_from_declarations(dirnames=SOURCE_DIRS, *,
-                              known=None,
-                              ):
-    """Yield a Variable for each found declaration.
-
-    Details are filled in from the given "known" variables and types.
-    """
-    raise NotImplementedError
-
-
-def iter_variables(kind='platform', *,
-                   known=None,
-                   dirnames=None,
-                   _resolve_symbols=resolve.symbols_to_variables,
-                   _get_symbol_resolver=resolve.get_resolver,
-                   _symbols_from_binary=b_symbols.iter_symbols,
-                   _symbols_from_source=s_symbols.iter_symbols,
-                   _iter_raw=declarations.iter_all,
-                   _iter_preprocessed=declarations.iter_preprocessed,
-                   ):
-    """Yield a Variable for each one found (e.g. in files)."""
-    kind = kind or 'platform'
-
-    if kind == 'symbols':
-        knownvars = (known or {}).get('variables')
-        yield from _resolve_symbols(
-                _symbols_from_source(dirnames, known),
-                resolve=_get_symbol_resolver(knownvars, dirnames),
-                )
-    elif kind == 'platform':
-        knownvars = (known or {}).get('variables')
-        yield from _resolve_symbols(
-                _symbols_from_binary(find_local_symbol=None),
-                resolve=_get_symbol_resolver(knownvars, dirnames),
-                )
-    elif kind == 'declarations':
-        for decl in _iter_raw(dirnames):
-            if not isinstance(decl, info.Variable):
-                continue
-            yield decl
-    elif kind == 'preprocessed':
-        for decl in _iter_preprocessed(dirnames):
-            if not isinstance(decl, info.Variable):
-                continue
-            yield decl
-    else:
-        raise ValueError(f'unsupported kind {kind!r}')
-
-
-def globals(dirnames, known, *,
-            kind=None,  # Use the default.
-            _iter_variables=iter_variables,
-            ):
-    """Return a list of (StaticVar, <supported>) for each found global var."""
-    for found in _iter_variables(kind, known=known, dirnames=dirnames):
-        if not found.isglobal:
-            continue
-        yield found
diff --git a/Tools/c-analyzer/c_globals/show.py b/Tools/c-analyzer/c_globals/show.py
deleted file mode 100644 (file)
index f4298b1..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-
-def basic(globals, *,
-          _print=print):
-    """Print each row simply."""
-    for variable in globals:
-        if variable.funcname:
-            line = f'{variable.filename}:{variable.funcname}():{variable.name}'
-        else:
-            line = f'{variable.filename}:{variable.name}'
-        vartype = variable.vartype
-        #if vartype.startswith('static '):
-        #    vartype = vartype.partition(' ')[2]
-        #else:
-        #    vartype = '=' + vartype
-        line = f'{line:<64} {vartype}'
-        _print(line)
diff --git a/Tools/c-analyzer/c_symbols/resolve.py b/Tools/c-analyzer/c_symbols/resolve.py
deleted file mode 100644 (file)
index 56210ce..0000000
+++ /dev/null
@@ -1,147 +0,0 @@
-import os.path
-
-from c_analyzer_common import files
-from c_analyzer_common.info import UNKNOWN
-from c_parser import declarations, info
-from .info import Symbol
-from .source import _find_symbol
-
-
-# XXX need tests:
-# * look_up_known_symbol()
-# * symbol_from_source()
-# * get_resolver()
-# * symbols_to_variables()
-
-def look_up_known_symbol(symbol, knownvars, *,
-                         match_files=(lambda f1, f2: f1 == f2),
-                         ):
-    """Return the known variable matching the given symbol.
-
-    "knownvars" is a mapping of common.ID to parser.Variable.
-
-    "match_files" is used to verify if two filenames point to
-    the same file.
-    """
-    if not knownvars:
-        return None
-
-    if symbol.funcname == UNKNOWN:
-        if not symbol.filename or symbol.filename == UNKNOWN:
-            for varid in knownvars:
-                if not varid.funcname:
-                    continue
-                if varid.name == symbol.name:
-                    return knownvars[varid]
-            else:
-                return None
-        else:
-            for varid in knownvars:
-                if not varid.funcname:
-                    continue
-                if not match_files(varid.filename, symbol.filename):
-                    continue
-                if varid.name == symbol.name:
-                    return knownvars[varid]
-            else:
-                return None
-    elif not symbol.filename or symbol.filename == UNKNOWN:
-        raise NotImplementedError
-    else:
-        return knownvars.get(symbol.id)
-
-
-def find_in_source(symbol, dirnames, *,
-                   _perfilecache={},
-                   _find_symbol=_find_symbol,
-                   _iter_files=files.iter_files_by_suffix,
-                   ):
-    """Return the Variable matching the given Symbol.
-
-    If there is no match then return None.
-    """
-    if symbol.filename and symbol.filename != UNKNOWN:
-        filenames = [symbol.filename]
-    else:
-        filenames = _iter_files(dirnames, ('.c', '.h'))
-
-    if symbol.funcname and symbol.funcname != UNKNOWN:
-        raise NotImplementedError
-
-    (filename, funcname, decl
-     ) = _find_symbol(symbol.name, filenames, _perfilecache)
-    if filename == UNKNOWN:
-        return None
-    return info.Variable.from_parts(filename, funcname, symbol.name, decl)
-
-
-def get_resolver(knownvars=None, dirnames=None, *,
-                 _look_up_known=look_up_known_symbol,
-                 _from_source=find_in_source,
-                 ):
-    """Return a "resolver" func for the given known vars and dirnames.
-
-    The func takes a single Symbol and returns a corresponding Variable.
-    If the symbol was located then the variable will be valid, populated
-    with the corresponding information.  Otherwise None is returned.
-    """
-    if knownvars:
-        knownvars = dict(knownvars)  # a copy
-        def resolve_known(symbol):
-            found = _look_up_known(symbol, knownvars)
-            if found is None:
-                return None
-            elif symbol.funcname == UNKNOWN:
-                knownvars.pop(found.id)
-            elif not symbol.filename or symbol.filename == UNKNOWN:
-                knownvars.pop(found.id)
-            return found
-        if dirnames:
-            def resolve(symbol):
-                found = resolve_known(symbol)
-                if found is None:
-                    return None
-                    #return _from_source(symbol, dirnames)
-                else:
-                    for dirname in dirnames:
-                        if not dirname.endswith(os.path.sep):
-                            dirname += os.path.sep
-                        if found.filename.startswith(dirname):
-                            break
-                    else:
-                        return None
-                    return found
-        else:
-            resolve = resolve_known
-    elif dirnames:
-        def resolve(symbol):
-            return _from_source(symbol, dirnames)
-    else:
-        def resolve(symbol):
-            return None
-    return resolve
-
-
-def symbols_to_variables(symbols, *,
-                         resolve=(lambda s: look_up_known_symbol(s, None)),
-                         ):
-    """Yield the variable the matches each given symbol.
-
-    Use get_resolver() for a "resolve" func to use.
-    """
-    for symbol in symbols:
-        if isinstance(symbol, info.Variable):
-            # XXX validate?
-            yield symbol
-            continue
-        if symbol.kind != Symbol.KIND.VARIABLE:
-            continue
-        resolved = resolve(symbol)
-        if resolved is None:
-            #raise NotImplementedError(symbol)
-            resolved = info.Variable(
-                    id=symbol.id,
-                    storage=UNKNOWN,
-                    vartype=UNKNOWN,
-                    )
-        yield resolved
diff --git a/Tools/c-analyzer/c_symbols/source.py b/Tools/c-analyzer/c_symbols/source.py
deleted file mode 100644 (file)
index a724810..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-from c_analyzer_common import files
-from c_analyzer_common.info import UNKNOWN
-from c_parser import declarations
-
-
-# XXX need tests:
-# * find_symbol()
-
-def find_symbol(name, dirnames, *,
-                _perfilecache,
-                _iter_files=files.iter_files_by_suffix,
-                **kwargs
-                ):
-    """Return (filename, funcname, vartype) for the matching Symbol."""
-    filenames = _iter_files(dirnames, ('.c', '.h'))
-    return _find_symbol(name, filenames, _perfilecache, **kwargs)
-
-
-def _get_symbols(filename, *,
-                 _iter_variables=declarations.iter_variables,
-                 ):
-    """Return the list of Symbols found in the given file."""
-    symbols = {}
-    for funcname, name, vartype in _iter_variables(filename):
-        if not funcname:
-            continue
-        try:
-            instances = symbols[name]
-        except KeyError:
-            instances = symbols[name] = []
-        instances.append((funcname, vartype))
-    return symbols
-
-
-def _find_symbol(name, filenames, _perfilecache, *,
-                _get_local_symbols=_get_symbols,
-                ):
-    for filename in filenames:
-        try:
-            symbols = _perfilecache[filename]
-        except KeyError:
-            symbols = _perfilecache[filename] = _get_local_symbols(filename)
-
-        try:
-            instances = symbols[name]
-        except KeyError:
-            continue
-
-        funcname, vartype = instances.pop(0)
-        if not instances:
-            symbols.pop(name)
-        return filename, funcname, vartype
-    else:
-        return UNKNOWN, UNKNOWN, UNKNOWN
-
-
-def iter_symbols():
-    raise NotImplementedError
diff --git a/Tools/c-analyzer/cpython/__init__.py b/Tools/c-analyzer/cpython/__init__.py
new file mode 100644 (file)
index 0000000..ae45b42
--- /dev/null
@@ -0,0 +1,29 @@
+import os.path
+import sys
+
+
+TOOL_ROOT = os.path.abspath(
+        os.path.dirname(  # c-analyzer/
+            os.path.dirname(__file__)))  # cpython/
+DATA_DIR = TOOL_ROOT
+REPO_ROOT = (
+        os.path.dirname(  # ..
+            os.path.dirname(TOOL_ROOT)))  # Tools/
+
+INCLUDE_DIRS = [os.path.join(REPO_ROOT, name) for name in [
+        'Include',
+        ]]
+SOURCE_DIRS = [os.path.join(REPO_ROOT, name) for name in [
+        'Python',
+        'Parser',
+        'Objects',
+        'Modules',
+        ]]
+
+#PYTHON = os.path.join(REPO_ROOT, 'python')
+PYTHON = sys.executable
+
+
+# Clean up the namespace.
+del sys
+del os
similarity index 68%
rename from Tools/c-analyzer/c_globals/__main__.py
rename to Tools/c-analyzer/cpython/__main__.py
index 9570fb6a14c4e69c4592e4d765c36f22722bffe4..6b0f9bcb9687fbfab440ec6f9b15fe42d9c89899 100644 (file)
@@ -1,42 +1,42 @@
 import argparse
-import os.path
 import re
 import sys
 
-from c_analyzer_common import SOURCE_DIRS, REPO_ROOT
-from c_analyzer_common.info import UNKNOWN
-from c_analyzer_common.known import (
+from c_analyzer.common import show
+from c_analyzer.common.info import UNKNOWN
+
+from . import SOURCE_DIRS
+from .find import supported_vars
+from .known import (
     from_file as known_from_file,
     DATA_FILE as KNOWN_FILE,
     )
-from . import find, show
-from .supported import is_supported, ignored_from_file, IGNORED_FILE, _is_object
+from .supported import IGNORED_FILE
 
 
-def _match_unused_global(variable, knownvars, used):
-    found = []
-    for varid in knownvars:
-        if varid in used:
-            continue
-        if varid.funcname is not None:
-            continue
-        if varid.name != variable.name:
-            continue
-        if variable.filename and variable.filename != UNKNOWN:
-            if variable.filename == varid.filename:
+def _check_results(unknown, knownvars, used):
+    def _match_unused_global(variable):
+        found = []
+        for varid in knownvars:
+            if varid in used:
+                continue
+            if varid.funcname is not None:
+                continue
+            if varid.name != variable.name:
+                continue
+            if variable.filename and variable.filename != UNKNOWN:
+                if variable.filename == varid.filename:
+                    found.append(varid)
+            else:
                 found.append(varid)
-        else:
-            found.append(varid)
-    return found
-
+        return found
 
-def _check_results(unknown, knownvars, used):
     badknown = set()
     for variable in sorted(unknown):
         msg = None
         if variable.funcname != UNKNOWN:
             msg = f'could not find global symbol {variable.id}'
-        elif m := _match_unused_global(variable, knownvars, used):
+        elif m := _match_unused_global(variable):
             assert isinstance(m, list)
             badknown.update(m)
         elif variable.name in ('completed', 'id'):  # XXX Figure out where these variables are.
@@ -65,32 +65,29 @@ def _check_results(unknown, knownvars, used):
         raise Exception('could not find all symbols')
 
 
-def _find_globals(dirnames, known, ignored):
-    if dirnames == SOURCE_DIRS:
-        dirnames = [os.path.relpath(d, REPO_ROOT) for d in dirnames]
-
-    ignored = ignored_from_file(ignored)
-    known = known_from_file(known)
+# XXX Move this check to its own command.
+def cmd_check_cache(cmd, *,
+                    known=KNOWN_FILE,
+                    ignored=IGNORED_FILE,
+                    _known_from_file=known_from_file,
+                    _find=supported_vars,
+                    ):
+    known = _known_from_file(known)
 
     used = set()
     unknown = set()
-    knownvars = (known or {}).get('variables')
-    for variable in find.globals_from_binary(knownvars=knownvars,
-                                             dirnames=dirnames):
-    #for variable in find.globals(dirnames, known, kind='platform'):
-        if variable.vartype == UNKNOWN:
-            unknown.add(variable)
+    for var, supported in _find(known=known, ignored=ignored):
+        if supported is None:
+            unknown.add(var)
             continue
-        yield variable, is_supported(variable, ignored, known)
-        used.add(variable.id)
-
-    #_check_results(unknown, knownvars, used)
+        used.add(var.id)
+    _check_results(unknown, known['variables'], used)
 
 
-def cmd_check(cmd, dirs=SOURCE_DIRS, *,
-              ignored=IGNORED_FILE,
+def cmd_check(cmd, *,
               known=KNOWN_FILE,
-              _find=_find_globals,
+              ignored=IGNORED_FILE,
+              _find=supported_vars,
               _show=show.basic,
               _print=print,
               ):
@@ -100,7 +97,11 @@ def cmd_check(cmd, dirs=SOURCE_DIRS, *,
     In the failure case, the list of unsupported variables
     will be printed out.
     """
-    unsupported = [v for v, s in _find(dirs, known, ignored) if not s]
+    unsupported = []
+    for var, supported in _find(known=known, ignored=ignored):
+        if not supported:
+            unsupported.append(var)
+
     if not unsupported:
         #_print('okay')
         return
@@ -112,11 +113,11 @@ def cmd_check(cmd, dirs=SOURCE_DIRS, *,
     sys.exit(1)
 
 
-def cmd_show(cmd, dirs=SOURCE_DIRS, *,
-             ignored=IGNORED_FILE,
+def cmd_show(cmd, *,
              known=KNOWN_FILE,
+             ignored=IGNORED_FILE,
              skip_objects=False,
-              _find=_find_globals,
+              _find=supported_vars,
              _show=show.basic,
              _print=print,
              ):
@@ -127,10 +128,12 @@ def cmd_show(cmd, dirs=SOURCE_DIRS, *,
     """
     allsupported = []
     allunsupported = []
-    for found, supported in _find(dirs, known, ignored):
-        if skip_objects:  # XXX Support proper filters instead.
-            if _is_object(found.vartype):
-                continue
+    for found, supported in _find(known=known,
+                                  ignored=ignored,
+                                  skip_objects=skip_objects,
+                                  ):
+        if supported is None:
+            continue
         (allsupported if supported else allunsupported
          ).append(found)
 
@@ -165,9 +168,9 @@ def parse_args(prog=PROG, argv=sys.argv[1:], *, _fail=None):
     common.add_argument('--known', metavar='FILE',
                         default=KNOWN_FILE,
                         help='path to file that lists known types')
-    common.add_argument('dirs', metavar='DIR', nargs='*',
-                        default=SOURCE_DIRS,
-                        help='a directory to check')
+    #common.add_argument('dirs', metavar='DIR', nargs='*',
+    #                    default=SOURCE_DIRS,
+    #                    help='a directory to check')
 
     parser = argparse.ArgumentParser(
             prog=prog,
similarity index 97%
rename from Tools/c-analyzer/c_analyzer_common/_generate.py
rename to Tools/c-analyzer/cpython/_generate.py
index 9b2fc9edb5c8240036f585046cdc360b32ea5fd9..4c340acf99e1c37c877082e445d33e2c06a7ec05 100644 (file)
@@ -1,15 +1,16 @@
 # The code here consists of hacks for pre-populating the known.tsv file.
 
-from c_parser.preprocessor import _iter_clean_lines
-from c_parser.naive import (
+from c_analyzer.parser.preprocessor import _iter_clean_lines
+from c_analyzer.parser.naive import (
         iter_variables, parse_variable_declaration, find_variables,
         )
-from c_parser.info import Variable
+from c_analyzer.common.known import HEADER as KNOWN_HEADER
+from c_analyzer.common.info import UNKNOWN, ID
+from c_analyzer.variables import Variable
+from c_analyzer.util import write_tsv
 
 from . import SOURCE_DIRS, REPO_ROOT
-from .known import DATA_FILE as KNOWN_FILE, HEADER as KNOWN_HEADER
-from .info import UNKNOWN, ID
-from .util import write_tsv
+from .known import DATA_FILE as KNOWN_FILE
 from .files import iter_cpython_files
 
 
diff --git a/Tools/c-analyzer/cpython/files.py b/Tools/c-analyzer/cpython/files.py
new file mode 100644 (file)
index 0000000..543097a
--- /dev/null
@@ -0,0 +1,29 @@
+from c_analyzer.common.files import (
+        C_SOURCE_SUFFIXES, walk_tree, iter_files_by_suffix,
+        )
+
+from . import SOURCE_DIRS, REPO_ROOT
+
+# XXX need tests:
+# * iter_files()
+
+
+def iter_files(*,
+               walk=walk_tree,
+               _files=iter_files_by_suffix,
+               ):
+    """Yield each file in the tree for each of the given directory names."""
+    excludedtrees = [
+        os.path.join('Include', 'cpython', ''),
+        ]
+    def is_excluded(filename):
+        for root in excludedtrees:
+            if filename.startswith(root):
+                return True
+        return False
+    for filename in _files(SOURCE_DIRS, C_SOURCE_SUFFIXES, REPO_ROOT,
+                           walk=walk,
+                           ):
+        if is_excluded(filename):
+            continue
+        yield filename
diff --git a/Tools/c-analyzer/cpython/find.py b/Tools/c-analyzer/cpython/find.py
new file mode 100644 (file)
index 0000000..a7bc0b4
--- /dev/null
@@ -0,0 +1,101 @@
+import os.path
+
+from c_analyzer.common import files
+from c_analyzer.common.info import UNKNOWN, ID
+from c_analyzer.variables import find as _common
+
+from . import SOURCE_DIRS, PYTHON, REPO_ROOT
+from .known import (
+    from_file as known_from_file,
+    DATA_FILE as KNOWN_FILE,
+    )
+from .supported import (
+        ignored_from_file, IGNORED_FILE, is_supported, _is_object,
+        )
+
+# XXX need tests:
+# * vars_from_binary()
+# * vars_from_source()
+# * supported_vars()
+
+
+def _handle_id(filename, funcname, name, *,
+               _relpath=os.path.relpath,
+               ):
+    filename = _relpath(filename, REPO_ROOT)
+    return ID(filename, funcname, name)
+
+
+def vars_from_binary(*,
+                     known=KNOWN_FILE,
+                     _known_from_file=known_from_file,
+                     _iter_files=files.iter_files_by_suffix,
+                     _iter_vars=_common.vars_from_binary,
+                     ):
+    """Yield a Variable for each found Symbol.
+
+    Details are filled in from the given "known" variables and types.
+    """
+    if isinstance(known, str):
+        known = _known_from_file(known)
+    dirnames = SOURCE_DIRS
+    suffixes = ('.c',)
+    filenames = _iter_files(dirnames, suffixes)
+    # XXX For now we only use known variables (no source lookup).
+    filenames = None
+    yield from _iter_vars(PYTHON,
+                          known=known,
+                          filenames=filenames,
+                          handle_id=_handle_id,
+                          check_filename=(lambda n: True),
+                          )
+
+
+def vars_from_source(*,
+                     preprocessed=None,
+                     known=KNOWN_FILE,
+                     _known_from_file=known_from_file,
+                     _iter_files=files.iter_files_by_suffix,
+                     _iter_vars=_common.vars_from_source,
+                     ):
+    """Yield a Variable for each declaration in the raw source code.
+
+    Details are filled in from the given "known" variables and types.
+    """
+    if isinstance(known, str):
+        known = _known_from_file(known)
+    dirnames = SOURCE_DIRS
+    suffixes = ('.c',)
+    filenames = _iter_files(dirnames, suffixes)
+    yield from _iter_vars(filenames,
+                          preprocessed=preprocessed,
+                          known=known,
+                          handle_id=_handle_id,
+                          )
+
+
+def supported_vars(*,
+                   known=KNOWN_FILE,
+                   ignored=IGNORED_FILE,
+                   skip_objects=False,
+                   _known_from_file=known_from_file,
+                   _ignored_from_file=ignored_from_file,
+                   _iter_vars=vars_from_binary,
+                   _is_supported=is_supported,
+                   ):
+    """Yield (var, is supported) for each found variable."""
+    if isinstance(known, str):
+        known = _known_from_file(known)
+    if isinstance(ignored, str):
+        ignored = _ignored_from_file(ignored)
+
+    for var in _iter_vars(known=known):
+        if not var.isglobal:
+            continue
+        elif var.vartype == UNKNOWN:
+            yield var, None
+        # XXX Support proper filters instead.
+        elif skip_objects and _is_object(found.vartype):
+            continue
+        else:
+            yield var, _is_supported(var, ignored, known)
diff --git a/Tools/c-analyzer/cpython/known.py b/Tools/c-analyzer/cpython/known.py
new file mode 100644 (file)
index 0000000..c3cc2c0
--- /dev/null
@@ -0,0 +1,66 @@
+import csv
+import os.path
+
+from c_analyzer.parser.declarations import extract_storage
+from c_analyzer.variables import known as _common
+from c_analyzer.variables.info import Variable
+
+from . import DATA_DIR
+
+
+# XXX need tests:
+# * from_file()
+# * look_up_variable()
+
+
+DATA_FILE = os.path.join(DATA_DIR, 'known.tsv')
+
+
+def _get_storage(decl, infunc):
+    # statics
+    if decl.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
+        return 'static'
+    if decl.startswith(('_Py_IDENTIFIER(', '_Py_static_string(')):
+        return 'static'
+    if decl.startswith('PyDoc_VAR('):
+        return 'static'
+    if decl.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
+        return 'static'
+    if decl.startswith('WRAP_METHOD('):
+        return 'static'
+    # public extern
+    if decl.startswith('PyAPI_DATA('):
+        return 'extern'
+    # Fall back to the normal handler.
+    return extract_storage(decl, infunc=infunc)
+
+
+def _handle_var(varid, decl):
+#    if varid.name == 'id' and decl == UNKNOWN:
+#        # None of these are variables.
+#        decl = 'int id';
+    storage = _get_storage(decl, varid.funcname)
+    return Variable(varid, storage, decl)
+
+
+def from_file(infile=DATA_FILE, *,
+              _from_file=_common.from_file,
+              _handle_var=_handle_var,
+              ):
+    """Return the info for known declarations in the given file."""
+    return _from_file(infile, handle_var=_handle_var)
+
+
+def look_up_variable(varid, knownvars, *,
+                     _lookup=_common.look_up_variable,
+                     ):
+    """Return the known variable matching the given ID.
+
+    "knownvars" is a mapping of ID to Variable.
+
+    "match_files" is used to verify if two filenames point to
+    the same file.
+
+    If no match is found then None is returned.
+    """
+    return _lookup(varid, knownvars)
similarity index 97%
rename from Tools/c-analyzer/c_globals/supported.py
rename to Tools/c-analyzer/cpython/supported.py
index d185daa2463bb2747f0daf423dc93c979e6b0424..18786eefd8dedcc45087debf00a0186ba8da647a 100644 (file)
@@ -1,9 +1,13 @@
 import os.path
 import re
 
-from c_analyzer_common import DATA_DIR
-from c_analyzer_common.info import ID
-from c_analyzer_common.util import read_tsv, write_tsv
+from c_analyzer.common.info import ID
+from c_analyzer.common.util import read_tsv, write_tsv
+
+from . import DATA_DIR
+
+# XXX need tests:
+# * generate / script
 
 
 IGNORED_FILE = os.path.join(DATA_DIR, 'ignored.tsv')
@@ -379,11 +383,12 @@ def _generate_ignored_file(variables, filename=None, *,
 
 
 if __name__ == '__main__':
-    from c_analyzer_common import SOURCE_DIRS
-    from c_analyzer_common.known import (
+    from cpython import SOURCE_DIRS
+    from cpython.known import (
         from_file as known_from_file,
         DATA_FILE as KNOWN_FILE,
         )
+    # XXX This is wrong!
     from . import find
     known = known_from_file(KNOWN_FILE)
     knownvars = (known or {}).get('variables')