From 725593eab9cbc63357d3d08f1e13721d6f97e55c Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" Date: Sat, 12 Oct 2019 11:57:20 +0000 Subject: [PATCH] Reland r374389: [lit] Clean up internal diff's encoding handling To avoid breaking some tests, D66574, D68664, D67643, and D68668 landed together. However, D68664 introduced an issue now addressed by D68839, with which these are now all relanding. Differential Revision: https://reviews.llvm.org/D68664 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@374649 91177308-0d34-0410-b5e6-96231b3b80d8 --- utils/lit/lit/builtin_commands/diff.py | 51 +++++++---------- .../Inputs/shtest-shell/diff-encodings.txt | 9 +++ .../lit/tests/Inputs/shtest-shell/diff-in.bin | Bin 0 -> 26 bytes .../tests/Inputs/shtest-shell/diff-in.utf16 | Bin 0 -> 24 bytes .../tests/Inputs/shtest-shell/diff-in.utf8 | 3 + utils/lit/tests/max-failures.py | 2 +- utils/lit/tests/shtest-shell.py | 54 +++++++++++++++++- 7 files changed, 86 insertions(+), 33 deletions(-) create mode 100644 utils/lit/tests/Inputs/shtest-shell/diff-encodings.txt create mode 100644 utils/lit/tests/Inputs/shtest-shell/diff-in.bin create mode 100644 utils/lit/tests/Inputs/shtest-shell/diff-in.utf16 create mode 100644 utils/lit/tests/Inputs/shtest-shell/diff-in.utf8 diff --git a/utils/lit/lit/builtin_commands/diff.py b/utils/lit/lit/builtin_commands/diff.py index 885b425c95f..562b9ac37ac 100644 --- a/utils/lit/lit/builtin_commands/diff.py +++ b/utils/lit/lit/builtin_commands/diff.py @@ -1,6 +1,7 @@ import difflib import functools import getopt +import locale import os import sys @@ -24,37 +25,26 @@ def getDirTree(path, basedir=""): return path, sorted(child_trees) def compareTwoFiles(flags, filepaths): - compare_bytes = False - encoding = None filelines = [] for file in filepaths: - try: - with open(file, 'r') as f: - filelines.append(f.readlines()) - except UnicodeDecodeError: - try: - with io.open(file, 'r', encoding="utf-8") as f: - filelines.append(f.readlines()) - encoding = "utf-8" - except: - compare_bytes = True - - if compare_bytes: - return compareTwoBinaryFiles(flags, filepaths) - else: - return compareTwoTextFiles(flags, filepaths, encoding) + with open(file, 'rb') as file_bin: + filelines.append(file_bin.readlines()) -def compareTwoBinaryFiles(flags, filepaths): - filelines = [] - for file in filepaths: - with open(file, 'rb') as f: - filelines.append(f.readlines()) + try: + return compareTwoTextFiles(flags, filepaths, filelines, + locale.getpreferredencoding(False)) + except UnicodeDecodeError: + try: + return compareTwoTextFiles(flags, filepaths, filelines, "utf-8") + except: + return compareTwoBinaryFiles(flags, filepaths, filelines) +def compareTwoBinaryFiles(flags, filepaths, filelines): exitCode = 0 if hasattr(difflib, 'diff_bytes'): # python 3.5 or newer diffs = difflib.diff_bytes(difflib.unified_diff, filelines[0], filelines[1], filepaths[0].encode(), filepaths[1].encode()) - diffs = [diff.decode() for diff in diffs] + diffs = [diff.decode(errors="backslashreplace") for diff in diffs] else: # python 2.7 if flags.unified_diff: @@ -68,15 +58,14 @@ def compareTwoBinaryFiles(flags, filepaths): exitCode = 1 return exitCode -def compareTwoTextFiles(flags, filepaths, encoding): +def compareTwoTextFiles(flags, filepaths, filelines_bin, encoding): filelines = [] - for file in filepaths: - if encoding is None: - with open(file, 'r') as f: - filelines.append(f.readlines()) - else: - with io.open(file, 'r', encoding=encoding) as f: - filelines.append(f.readlines()) + for lines_bin in filelines_bin: + lines = [] + for line_bin in lines_bin: + line = line_bin.decode(encoding=encoding) + lines.append(line) + filelines.append(lines) exitCode = 0 def compose2(f, g): diff --git a/utils/lit/tests/Inputs/shtest-shell/diff-encodings.txt b/utils/lit/tests/Inputs/shtest-shell/diff-encodings.txt new file mode 100644 index 00000000000..d8b9718a099 --- /dev/null +++ b/utils/lit/tests/Inputs/shtest-shell/diff-encodings.txt @@ -0,0 +1,9 @@ +# Check that diff falls back to binary mode if it cannot decode a file. + +# RUN: diff -u diff-in.bin diff-in.bin +# RUN: diff -u diff-in.utf16 diff-in.bin && false || true +# RUN: diff -u diff-in.utf8 diff-in.bin && false || true +# RUN: diff -u diff-in.bin diff-in.utf8 && false || true + +# Fail so lit will print output. +# RUN: false diff --git a/utils/lit/tests/Inputs/shtest-shell/diff-in.bin b/utils/lit/tests/Inputs/shtest-shell/diff-in.bin new file mode 100644 index 0000000000000000000000000000000000000000..06b800b707c1ade254fa995363aa211d096b534e GIT binary patch literal 26 acmZQbW5@?WE`}tAM24dO|DmiZ1}*?iD+SX4 literal 0 HcmV?d00001 diff --git a/utils/lit/tests/Inputs/shtest-shell/diff-in.utf16 b/utils/lit/tests/Inputs/shtest-shell/diff-in.utf16 new file mode 100644 index 0000000000000000000000000000000000000000..d7d9feefa7da8341244deff11b6ea91b4d36b8f9 GIT binary patch literal 24 YcmZQbW5@?WE`}tAM1~?LUB$o!05LiP)c^nh literal 0 HcmV?d00001 diff --git a/utils/lit/tests/Inputs/shtest-shell/diff-in.utf8 b/utils/lit/tests/Inputs/shtest-shell/diff-in.utf8 new file mode 100644 index 00000000000..86e041dad66 --- /dev/null +++ b/utils/lit/tests/Inputs/shtest-shell/diff-in.utf8 @@ -0,0 +1,3 @@ +foo +bar +baz diff --git a/utils/lit/tests/max-failures.py b/utils/lit/tests/max-failures.py index cee06fa255d..5149a91ec00 100644 --- a/utils/lit/tests/max-failures.py +++ b/utils/lit/tests/max-failures.py @@ -8,7 +8,7 @@ # # END. -# CHECK: Failing Tests (27) +# CHECK: Failing Tests (28) # CHECK: Failing Tests (1) # CHECK: Failing Tests (2) # CHECK: error: argument --max-failures: requires positive integer, but found '0' diff --git a/utils/lit/tests/shtest-shell.py b/utils/lit/tests/shtest-shell.py index 3978e4470a9..6d9b1aa459c 100644 --- a/utils/lit/tests/shtest-shell.py +++ b/utils/lit/tests/shtest-shell.py @@ -34,6 +34,58 @@ # CHECK: error: command failed with exit status: 127 # CHECK: *** + +# CHECK: FAIL: shtest-shell :: diff-encodings.txt +# CHECK: *** TEST 'shtest-shell :: diff-encodings.txt' FAILED *** + +# CHECK: $ "diff" "-u" "diff-in.bin" "diff-in.bin" +# CHECK-NOT: error + +# CHECK: $ "diff" "-u" "diff-in.utf16" "diff-in.bin" +# CHECK: # command output: +# CHECK-NEXT: --- +# CHECK-NEXT: +++ +# CHECK-NEXT: @@ +# CHECK-NEXT: {{^ .f.o.o.$}} +# CHECK-NEXT: {{^-.b.a.r.$}} +# CHECK-NEXT: {{^\+.b.a.r..}} +# CHECK-NEXT: {{^ .b.a.z.$}} +# CHECK: error: command failed with exit status: 1 +# CHECK: $ "true" + +# CHECK: $ "diff" "-u" "diff-in.utf8" "diff-in.bin" +# CHECK: # command output: +# CHECK-NEXT: --- +# CHECK-NEXT: +++ +# CHECK-NEXT: @@ +# CHECK-NEXT: -foo +# CHECK-NEXT: -bar +# CHECK-NEXT: -baz +# CHECK-NEXT: {{^\+.f.o.o.$}} +# CHECK-NEXT: {{^\+.b.a.r..}} +# CHECK-NEXT: {{^\+.b.a.z.$}} +# CHECK: error: command failed with exit status: 1 +# CHECK: $ "true" + +# CHECK: $ "diff" "-u" "diff-in.bin" "diff-in.utf8" +# CHECK: # command output: +# CHECK-NEXT: --- +# CHECK-NEXT: +++ +# CHECK-NEXT: @@ +# CHECK-NEXT: {{^\-.f.o.o.$}} +# CHECK-NEXT: {{^\-.b.a.r..}} +# CHECK-NEXT: {{^\-.b.a.z.$}} +# CHECK-NEXT: +foo +# CHECK-NEXT: +bar +# CHECK-NEXT: +baz +# CHECK: error: command failed with exit status: 1 +# CHECK: $ "true" + +# CHECK: $ "false" + +# CHECK: *** + + # CHECK: FAIL: shtest-shell :: diff-error-1.txt # CHECK: *** TEST 'shtest-shell :: diff-error-1.txt' FAILED *** # CHECK: $ "diff" "-B" "temp1.txt" "temp2.txt" @@ -245,4 +297,4 @@ # CHECK: PASS: shtest-shell :: sequencing-0.txt # CHECK: XFAIL: shtest-shell :: sequencing-1.txt # CHECK: PASS: shtest-shell :: valid-shell.txt -# CHECK: Failing Tests (27) +# CHECK: Failing Tests (28) -- 2.40.0