From f816d16553db7037c5c0a7e6e069e4052b2bb8e9 Mon Sep 17 00:00:00 2001 From: Russell Gallop Date: Thu, 2 Apr 2015 15:01:53 +0000 Subject: [PATCH] [utils] Add Check Compile Flow Consistency tool (check_cfc.py). This is a tool for checking consistency of code generation with different compiler options (such as -g or outputting to .s). This tool has found a number of code generation issues. The script acts as a wrapper to clang or clang++ performing 2 (or more) compiles then comparing the object files. Instructions for use are in check_cfc.py including how to use with LNT. Differential Revision: http://reviews.llvm.org/D8723 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@233919 91177308-0d34-0410-b5e6-96231b3b80d8 --- utils/check_cfc/check_cfc.cfg | 3 + utils/check_cfc/check_cfc.py | 388 ++++++++++++++++++++++++++++++ utils/check_cfc/obj_diff.py | 79 ++++++ utils/check_cfc/setup.py | 21 ++ utils/check_cfc/test_check_cfc.py | 158 ++++++++++++ 5 files changed, 649 insertions(+) create mode 100644 utils/check_cfc/check_cfc.cfg create mode 100755 utils/check_cfc/check_cfc.py create mode 100755 utils/check_cfc/obj_diff.py create mode 100644 utils/check_cfc/setup.py create mode 100755 utils/check_cfc/test_check_cfc.py diff --git a/utils/check_cfc/check_cfc.cfg b/utils/check_cfc/check_cfc.cfg new file mode 100644 index 0000000000..967623e713 --- /dev/null +++ b/utils/check_cfc/check_cfc.cfg @@ -0,0 +1,3 @@ +[Checks] +dash_g_no_change = true +dash_s_no_change = true diff --git a/utils/check_cfc/check_cfc.py b/utils/check_cfc/check_cfc.py new file mode 100755 index 0000000000..3def36eb62 --- /dev/null +++ b/utils/check_cfc/check_cfc.py @@ -0,0 +1,388 @@ +#!/usr/bin/env python2.7 + +"""Check CFC - Check Compile Flow Consistency + +This is a compiler wrapper for testing that code generation is consistent with +different compilation processes. It checks that code is not unduly affected by +compiler options or other changes which should not have side effects. + +To use: +-Ensure that the compiler under test (i.e. clang, clang++) is on the PATH +-On Linux copy this script to the name of the compiler + e.g. cp check_cfc.py clang && cp check_cfc.py clang++ +-On Windows use setup.py to generate check_cfc.exe and copy that to clang.exe + and clang++.exe +-Enable the desired checks in check_cfc.cfg (in the same directory as the + wrapper) + e.g. +[Checks] +dash_g_no_change = true +dash_s_no_change = false + +-The wrapper can be run using its absolute path or added to PATH before the + compiler under test + e.g. export PATH=:$PATH +-Compile as normal. The wrapper intercepts normal -c compiles and will return + non-zero if the check fails. + e.g. +$ clang -c test.cpp +Code difference detected with -g +--- /tmp/tmp5nv893.o ++++ /tmp/tmp6Vwjnc.o +@@ -1 +1 @@ +- 0: 48 8b 05 51 0b 20 00 mov 0x200b51(%rip),%rax ++ 0: 48 39 3d 51 0b 20 00 cmp %rdi,0x200b51(%rip) + +-To run LNT with Check CFC specify the absolute path to the wrapper to the --cc + and --cxx options + e.g. + lnt runtest nt --cc /clang \\ + --cxx /clang++ ... + +To add a new check: +-Create a new subclass of WrapperCheck +-Implement the perform_check() method. This should perform the alternate compile + and do the comparison. +-Add the new check to check_cfc.cfg. The check has the same name as the + subclass. +""" + +from __future__ import print_function + +import imp +import os +import platform +import shutil +import subprocess +import sys +import tempfile +import ConfigParser +import io + +import obj_diff + +def is_windows(): + """Returns True if running on Windows.""" + return platform.system() == 'Windows' + +class WrapperStepException(Exception): + """Exception type to be used when a step other than the original compile + fails.""" + def __init__(self, msg, stdout, stderr): + self.msg = msg + self.stdout = stdout + self.stderr = stderr + +class WrapperCheckException(Exception): + """Exception type to be used when a comparison check fails.""" + def __init__(self, msg): + self.msg = msg + +def main_is_frozen(): + """Returns True when running as a py2exe executable.""" + return (hasattr(sys, "frozen") or # new py2exe + hasattr(sys, "importers") or # old py2exe + imp.is_frozen("__main__")) # tools/freeze + +def get_main_dir(): + """Get the directory that the script or executable is located in.""" + if main_is_frozen(): + return os.path.dirname(sys.executable) + return os.path.dirname(sys.argv[0]) + +def remove_dir_from_path(path_var, directory): + """Remove the specified directory from path_var, a string representing + PATH""" + pathlist = path_var.split(os.pathsep) + norm_directory = os.path.normpath(os.path.normcase(directory)) + pathlist = filter(lambda x: os.path.normpath( + os.path.normcase(x)) != norm_directory, pathlist) + return os.pathsep.join(pathlist) + +def path_without_wrapper(): + """Returns the PATH variable modified to remove the path to this program.""" + scriptdir = get_main_dir() + path = os.environ['PATH'] + return remove_dir_from_path(path, scriptdir) + +def flip_dash_g(args): + """Search for -g in args. If it exists then return args without. If not then + add it.""" + if '-g' in args: + # Return args without any -g + return [x for x in args if x != '-g'] + else: + # No -g, add one + return args + ['-g'] + +def derive_output_file(args): + """Derive output file from the input file (if just one) or None + otherwise.""" + infile = get_input_file(args) + if infile is None: + return None + else: + return '{}.o'.format(os.path.splitext(infile)[0]) + +def get_output_file(args): + """Return the output file specified by this command or None if not + specified.""" + grabnext = False + for arg in args: + if grabnext: + return arg + if arg == '-o': + # Specified as a separate arg + grabnext = True + elif arg.startswith('-o'): + # Specified conjoined with -o + return arg[2:] + assert grabnext == False + + return None + +def is_output_specified(args): + """Return true is output file is specified in args.""" + return get_output_file(args) is not None + +def replace_output_file(args, new_name): + """Replaces the specified name of an output file with the specified name. + Assumes that the output file name is specified in the command line args.""" + replaceidx = None + attached = False + for idx, val in enumerate(args): + if val == '-o': + replaceidx = idx + 1 + attached = False + elif val.startswith('-o'): + replaceidx = idx + attached = True + + if replaceidx is None: + raise Exception + replacement = new_name + if attached == True: + replacement = '-o' + new_name + args[replaceidx] = replacement + return args + +def add_output_file(args, output_file): + """Append an output file to args, presuming not already specified.""" + return args + ['-o', output_file] + +def set_output_file(args, output_file): + """Set the output file within the arguments. Appends or replaces as + appropriate.""" + if is_output_specified(args): + args = replace_output_file(args, output_file) + else: + args = add_output_file(args, output_file) + return args + +gSrcFileSuffixes = ('.c', '.cpp', '.cxx', '.c++', '.cp', '.cc') + +def get_input_file(args): + """Return the input file string if it can be found (and there is only + one).""" + inputFiles = list() + for arg in args: + testarg = arg + quotes = ('"', "'") + while testarg.endswith(quotes): + testarg = testarg[:-1] + testarg = os.path.normcase(testarg) + + # Test if it is a source file + if testarg.endswith(gSrcFileSuffixes): + inputFiles.append(arg) + if len(inputFiles) == 1: + return inputFiles[0] + else: + return None + +def set_input_file(args, input_file): + """Replaces the input file with that specified.""" + infile = get_input_file(args) + if infile: + infile_idx = args.index(infile) + args[infile_idx] = input_file + return args + else: + # Could not find input file + assert False + +def is_normal_compile(args): + """Check if this is a normal compile which will output an object file rather + than a preprocess or link.""" + compile_step = '-c' in args + # Bitcode cannot be disassembled in the same way + bitcode = '-flto' in args or '-emit-llvm' in args + # Version and help are queries of the compiler and override -c if specified + query = '--version' in args or '--help' in args + # Check if the input is recognised as a source file (this may be too + # strong a restriction) + input_is_valid = bool(get_input_file(args)) + return compile_step and not bitcode and not query and input_is_valid + +def run_step(command, my_env, error_on_failure): + """Runs a step of the compilation. Reports failure as exception.""" + # Need to use shell=True on Windows as Popen won't use PATH otherwise. + p = subprocess.Popen(command, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, env=my_env, shell=is_windows()) + (stdout, stderr) = p.communicate() + if p.returncode != 0: + raise WrapperStepException(error_on_failure, stdout, stderr) + +def get_temp_file_name(suffix): + """Get a temporary file name with a particular suffix. Let the caller be + reponsible for deleting it.""" + tf = tempfile.NamedTemporaryFile(suffix=suffix, delete=False) + tf.close() + return tf.name + +class WrapperCheck(object): + """Base class for a check. Subclass this to add a check.""" + def __init__(self, output_file_a): + """Record the base output file that will be compared against.""" + self._output_file_a = output_file_a + + def perform_check(self, arguments, my_env): + """Override this to perform the modified compilation and required + checks.""" + raise NotImplementedError("Please Implement this method") + +class dash_g_no_change(WrapperCheck): + def perform_check(self, arguments, my_env): + """Check if different code is generated with/without the -g flag.""" + output_file_b = get_temp_file_name('.o') + + alternate_command = list(arguments) + alternate_command = flip_dash_g(alternate_command) + alternate_command = set_output_file(alternate_command, output_file_b) + run_step(alternate_command, my_env, "Error compiling with -g") + + # Compare disassembly (returns first diff if differs) + difference = obj_diff.compare_object_files(self._output_file_a, + output_file_b) + if difference: + raise WrapperCheckException( + "Code difference detected with -g\n{}".format(difference)) + + # Clean up temp file if comparison okay + os.remove(output_file_b) + +class dash_s_no_change(WrapperCheck): + def perform_check(self, arguments, my_env): + """Check if compiling to asm then assembling in separate steps results + in different code than compiling to object directly.""" + output_file_b = get_temp_file_name('.o') + + alternate_command = arguments + ['-via-file-asm'] + alternate_command = set_output_file(alternate_command, output_file_b) + run_step(alternate_command, my_env, + "Error compiling with -via-file-asm") + + # Compare disassembly (returns first diff if differs) + difference = obj_diff.compare_object_files(self._output_file_a, + output_file_b) + if difference: + raise WrapperCheckException( + "Code difference detected with -S\n{}".format(difference)) + + # Clean up temp file if comparison okay + os.remove(output_file_b) + +if __name__ == '__main__': + # Create configuration defaults from list of checks + default_config = """ +[Checks] +""" + + # Find all subclasses of WrapperCheck + checks = [cls.__name__ for cls in vars()['WrapperCheck'].__subclasses__()] + + for c in checks: + default_config += "{} = false\n".format(c) + + config = ConfigParser.RawConfigParser() + config.readfp(io.BytesIO(default_config)) + scriptdir = get_main_dir() + config_path = os.path.join(scriptdir, 'check_cfc.cfg') + try: + config.read(os.path.join(config_path)) + except: + print("Could not read config from {}, " + "using defaults.".format(config_path)) + + my_env = os.environ.copy() + my_env['PATH'] = path_without_wrapper() + + arguments_a = list(sys.argv) + + # Prevent infinite loop if called with absolute path. + arguments_a[0] = os.path.basename(arguments_a[0]) + + # Sanity check + enabled_checks = [check_name + for check_name in checks + if config.getboolean('Checks', check_name)] + checks_comma_separated = ', '.join(enabled_checks) + print("Check CFC, checking: {}".format(checks_comma_separated)) + + # A - original compilation + output_file_orig = get_output_file(arguments_a) + if output_file_orig is None: + output_file_orig = derive_output_file(arguments_a) + + p = subprocess.Popen(arguments_a, env=my_env, shell=is_windows()) + p.communicate() + if p.returncode != 0: + sys.exit(p.returncode) + + if not is_normal_compile(arguments_a) or output_file_orig is None: + # Bail out here if we can't apply checks in this case. + # Does not indicate an error. + # Maybe not straight compilation (e.g. -S or --version or -flto) + # or maybe > 1 input files. + sys.exit(0) + + # Sometimes we generate files which have very long names which can't be + # read/disassembled. This will exit early if we can't find the file we + # expected to be output. + if not os.path.isfile(output_file_orig): + sys.exit(0) + + # Copy output file to a temp file + temp_output_file_orig = get_temp_file_name('.o') + shutil.copyfile(output_file_orig, temp_output_file_orig) + + # Run checks, if they are enabled in config and if they are appropriate for + # this command line. + current_module = sys.modules[__name__] + for check_name in checks: + if config.getboolean('Checks', check_name): + class_ = getattr(current_module, check_name) + checker = class_(temp_output_file_orig) + try: + checker.perform_check(arguments_a, my_env) + except WrapperCheckException as e: + # Check failure + print(e.msg, file=sys.stderr) + + # Remove file to comply with build system expectations (no + # output file if failed) + os.remove(output_file_orig) + sys.exit(1) + + except WrapperStepException as e: + # Compile step failure + print(e.msg, file=sys.stderr) + print("*** stdout ***", file=sys.stderr) + print(e.stdout, file=sys.stderr) + print("*** stderr ***", file=sys.stderr) + print(e.stderr, file=sys.stderr) + + # Remove file to comply with build system expectations (no + # output file if failed) + os.remove(output_file_orig) + sys.exit(1) diff --git a/utils/check_cfc/obj_diff.py b/utils/check_cfc/obj_diff.py new file mode 100755 index 0000000000..6f932b3172 --- /dev/null +++ b/utils/check_cfc/obj_diff.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python2.7 + +from __future__ import print_function + +import argparse +import difflib +import os +import subprocess +import sys + +disassembler = 'objdump' + +def keep_line(line): + """Returns true for lines that should be compared in the disassembly + output.""" + return "file format" not in line + +def disassemble(objfile): + """Disassemble object to a file.""" + p = subprocess.Popen([disassembler, '-d', objfile], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + (out, err) = p.communicate() + if p.returncode or err: + print("Disassemble failed: {}".format(objfile)) + sys.exit(1) + return filter(keep_line, out.split(os.linesep)) + +def first_diff(a, b, fromfile, tofile): + """Returns the first few lines of a difference, if there is one. Python + diff can be very slow with large objects and the most interesting changes + are the first ones. Truncate data before sending to difflib. Returns None + is there is no difference.""" + + # Find first diff + first_diff_idx = None + for idx, val in enumerate(a): + if val != b[idx]: + first_diff_idx = idx + break + + if first_diff_idx == None: + # No difference + return None + + # Diff to first line of diff plus some lines + context = 3 + diff = difflib.unified_diff(a[:first_diff_idx+context], + b[:first_diff_idx+context], + fromfile, + tofile) + difference = "\n".join(diff) + if first_diff_idx + context < len(a): + difference += "\n*** Diff truncated ***" + return difference + +def compare_object_files(objfilea, objfileb): + """Compare disassembly of two different files. + Allowing unavoidable differences, such as filenames. + Return the first difference if the disassembly differs, or None. + """ + disa = disassemble(objfilea) + disb = disassemble(objfileb) + return first_diff(disa, disb, objfilea, objfileb) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('objfilea', nargs=1) + parser.add_argument('objfileb', nargs=1) + parser.add_argument('-v', '--verbose', action='store_true') + args = parser.parse_args() + diff = compare_object_files(args.objfilea[0], args.objfileb[0]) + if diff: + print("Difference detected") + if args.verbose: + print(diff) + sys.exit(1) + else: + print("The same") diff --git a/utils/check_cfc/setup.py b/utils/check_cfc/setup.py new file mode 100644 index 0000000000..b5fc473639 --- /dev/null +++ b/utils/check_cfc/setup.py @@ -0,0 +1,21 @@ +"""For use on Windows. Run with: + python.exe setup.py py2exe + """ +from distutils.core import setup +try: + import py2exe +except ImportError: + import platform + import sys + if platform.system() == 'Windows': + print "Could not find py2exe. Please install then run setup.py py2exe." + raise + else: + print "setup.py only required on Windows." + sys.exit(1) + +setup( + console=['check_cfc.py'], + name="Check CFC", + description='Check Compile Flow Consistency' + ) diff --git a/utils/check_cfc/test_check_cfc.py b/utils/check_cfc/test_check_cfc.py new file mode 100755 index 0000000000..0eee5b8384 --- /dev/null +++ b/utils/check_cfc/test_check_cfc.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python2.7 + +"""Test internal functions within check_cfc.py.""" + +import check_cfc +import os +import platform +import unittest + + +class TestCheckCFC(unittest.TestCase): + + def test_flip_dash_g(self): + self.assertIn('-g', check_cfc.flip_dash_g(['clang', '-c'])) + self.assertNotIn('-g', check_cfc.flip_dash_g(['clang', '-c', '-g'])) + self.assertNotIn( + '-g', check_cfc.flip_dash_g(['clang', '-g', '-c', '-g'])) + + def test_remove_dir_from_path(self): + bin_path = r'/usr/bin' + space_path = r'/home/user/space in path' + superstring_path = r'/usr/bin/local' + + # Test removing last thing in path + self.assertNotIn( + bin_path, check_cfc.remove_dir_from_path(bin_path, bin_path)) + + # Test removing one entry and leaving others + # Also tests removing repeated path + path_var = os.pathsep.join( + [superstring_path, bin_path, space_path, bin_path]) + stripped_path_var = check_cfc.remove_dir_from_path(path_var, bin_path) + self.assertIn(superstring_path, stripped_path_var) + self.assertNotIn(bin_path, stripped_path_var.split(os.pathsep)) + self.assertIn(space_path, stripped_path_var) + + # Test removing non-canonical path + self.assertNotIn(r'/usr//bin', + check_cfc.remove_dir_from_path(r'/usr//bin', bin_path)) + + if platform == 'Windows': + # Windows is case insensitive so should remove a different case + # path + self.assertNotIn( + bin_path, check_cfc.remove_dir_from_path(path_var, r'/USR/BIN')) + else: + # Case sensitive so will not remove different case path + self.assertIn( + bin_path, check_cfc.remove_dir_from_path(path_var, r'/USR/BIN')) + + def test_is_output_specified(self): + self.assertTrue( + check_cfc.is_output_specified(['clang', '-o', 'test.o'])) + self.assertTrue(check_cfc.is_output_specified(['clang', '-otest.o'])) + self.assertFalse( + check_cfc.is_output_specified(['clang', '-gline-tables-only'])) + # Not specified for implied output file name + self.assertFalse(check_cfc.is_output_specified(['clang', 'test.c'])) + + def test_get_output_file(self): + self.assertEqual( + check_cfc.get_output_file(['clang', '-o', 'test.o']), 'test.o') + self.assertEqual( + check_cfc.get_output_file(['clang', '-otest.o']), 'test.o') + self.assertIsNone( + check_cfc.get_output_file(['clang', '-gline-tables-only'])) + # Can't get output file if more than one input file + self.assertIsNone( + check_cfc.get_output_file(['clang', '-c', 'test.cpp', 'test2.cpp'])) + # No output file specified + self.assertIsNone(check_cfc.get_output_file(['clang', '-c', 'test.c'])) + + def test_derive_output_file(self): + # Test getting implicit output file + self.assertEqual( + check_cfc.derive_output_file(['clang', '-c', 'test.c']), 'test.o') + self.assertEqual( + check_cfc.derive_output_file(['clang', '-c', 'test.cpp']), 'test.o') + self.assertIsNone(check_cfc.derive_output_file(['clang', '--version'])) + + def test_is_normal_compile(self): + self.assertTrue(check_cfc.is_normal_compile( + ['clang', '-c', 'test.cpp', '-o', 'test2.o'])) + self.assertTrue( + check_cfc.is_normal_compile(['clang', '-c', 'test.cpp'])) + # Outputting bitcode is not a normal compile + self.assertFalse( + check_cfc.is_normal_compile(['clang', '-c', 'test.cpp', '-flto'])) + self.assertFalse( + check_cfc.is_normal_compile(['clang', '-c', 'test.cpp', '-emit-llvm'])) + # Outputting preprocessed output or assembly is not a normal compile + self.assertFalse( + check_cfc.is_normal_compile(['clang', '-E', 'test.cpp', '-o', 'test.ii'])) + self.assertFalse( + check_cfc.is_normal_compile(['clang', '-S', 'test.cpp', '-o', 'test.s'])) + # Input of preprocessed or assembly is not a "normal compile" + self.assertFalse( + check_cfc.is_normal_compile(['clang', '-c', 'test.s', '-o', 'test.o'])) + self.assertFalse( + check_cfc.is_normal_compile(['clang', '-c', 'test.ii', '-o', 'test.o'])) + # Specifying --version and -c is not a normal compile + self.assertFalse( + check_cfc.is_normal_compile(['clang', '-c', 'test.cpp', '--version'])) + self.assertFalse( + check_cfc.is_normal_compile(['clang', '-c', 'test.cpp', '--help'])) + + def test_replace_output_file(self): + self.assertEqual(check_cfc.replace_output_file( + ['clang', '-o', 'test.o'], 'testg.o'), ['clang', '-o', 'testg.o']) + self.assertEqual(check_cfc.replace_output_file( + ['clang', '-otest.o'], 'testg.o'), ['clang', '-otestg.o']) + with self.assertRaises(Exception): + check_cfc.replace_output_file(['clang'], 'testg.o') + + def test_add_output_file(self): + self.assertEqual(check_cfc.add_output_file( + ['clang'], 'testg.o'), ['clang', '-o', 'testg.o']) + + def test_set_output_file(self): + # Test output not specified + self.assertEqual( + check_cfc.set_output_file(['clang'], 'test.o'), ['clang', '-o', 'test.o']) + # Test output is specified + self.assertEqual(check_cfc.set_output_file( + ['clang', '-o', 'test.o'], 'testb.o'), ['clang', '-o', 'testb.o']) + + def test_get_input_file(self): + # No input file + self.assertIsNone(check_cfc.get_input_file(['clang'])) + # Input C file + self.assertEqual( + check_cfc.get_input_file(['clang', 'test.c']), 'test.c') + # Input C++ file + self.assertEqual( + check_cfc.get_input_file(['clang', 'test.cpp']), 'test.cpp') + # Multiple input files + self.assertIsNone( + check_cfc.get_input_file(['clang', 'test.c', 'test2.cpp'])) + self.assertIsNone( + check_cfc.get_input_file(['clang', 'test.c', 'test2.c'])) + # Don't handle preprocessed files + self.assertIsNone(check_cfc.get_input_file(['clang', 'test.i'])) + self.assertIsNone(check_cfc.get_input_file(['clang', 'test.ii'])) + # Test identifying input file with quotes + self.assertEqual( + check_cfc.get_input_file(['clang', '"test.c"']), '"test.c"') + self.assertEqual( + check_cfc.get_input_file(['clang', "'test.c'"]), "'test.c'") + # Test multiple quotes + self.assertEqual( + check_cfc.get_input_file(['clang', "\"'test.c'\""]), "\"'test.c'\"") + + def test_set_input_file(self): + self.assertEqual(check_cfc.set_input_file( + ['clang', 'test.c'], 'test.s'), ['clang', 'test.s']) + +if __name__ == '__main__': + unittest.main() -- 2.40.0