From 856e3fe4fff46dc9a57e2fc886dec6881c35b4ea Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Thu, 10 Oct 2019 14:27:14 +0000 Subject: [PATCH] Revert "[FileCheck] Implement --ignore-case option." This reverts commit r374339. It broke tests: http://lab.llvm.org:8011/builders/clang-x86_64-debian-fast/builds/19066 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@374359 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/CommandGuide/FileCheck.rst | 1407 +++++---- include/llvm/Support/FileCheck.h | 361 ++- lib/Support/FileCheck.cpp | 3975 +++++++++++++------------- lib/Support/FileCheckImpl.h | 1245 ++++---- test/FileCheck/check-ignore-case.txt | 45 - utils/FileCheck/FileCheck.cpp | 1307 +++++---- 6 files changed, 4138 insertions(+), 4202 deletions(-) delete mode 100644 test/FileCheck/check-ignore-case.txt diff --git a/docs/CommandGuide/FileCheck.rst b/docs/CommandGuide/FileCheck.rst index 7d8ecaa7bfa..e8b324d080d 100644 --- a/docs/CommandGuide/FileCheck.rst +++ b/docs/CommandGuide/FileCheck.rst @@ -1,706 +1,701 @@ -FileCheck - Flexible pattern matching file verifier -=================================================== - -.. program:: FileCheck - -SYNOPSIS --------- - -:program:`FileCheck` *match-filename* [*--check-prefix=XXX*] [*--strict-whitespace*] - -DESCRIPTION ------------ - -:program:`FileCheck` reads two files (one from standard input, and one -specified on the command line) and uses one to verify the other. This -behavior is particularly useful for the testsuite, which wants to verify that -the output of some tool (e.g. :program:`llc`) contains the expected information -(for example, a movsd from esp or whatever is interesting). This is similar to -using :program:`grep`, but it is optimized for matching multiple different -inputs in one file in a specific order. - -The ``match-filename`` file specifies the file that contains the patterns to -match. The file to verify is read from standard input unless the -:option:`--input-file` option is used. - -OPTIONS -------- - -Options are parsed from the environment variable ``FILECHECK_OPTS`` -and from the command line. - -.. option:: -help - - Print a summary of command line options. - -.. option:: --check-prefix prefix - - FileCheck searches the contents of ``match-filename`` for patterns to - match. By default, these patterns are prefixed with "``CHECK:``". - If you'd like to use a different prefix (e.g. because the same input - file is checking multiple different tool or options), the - :option:`--check-prefix` argument allows you to specify one or more - prefixes to match. Multiple prefixes are useful for tests which might - change for different run options, but most lines remain the same. - -.. option:: --check-prefixes prefix1,prefix2,... - - An alias of :option:`--check-prefix` that allows multiple prefixes to be - specified as a comma separated list. - -.. option:: --input-file filename - - File to check (defaults to stdin). - -.. option:: --match-full-lines - - By default, FileCheck allows matches of anywhere on a line. This - option will require all positive matches to cover an entire - line. Leading and trailing whitespace is ignored, unless - :option:`--strict-whitespace` is also specified. (Note: negative - matches from ``CHECK-NOT`` are not affected by this option!) - - Passing this option is equivalent to inserting ``{{^ *}}`` or - ``{{^}}`` before, and ``{{ *$}}`` or ``{{$}}`` after every positive - check pattern. - -.. option:: --strict-whitespace - - By default, FileCheck canonicalizes input horizontal whitespace (spaces and - tabs) which causes it to ignore these differences (a space will match a tab). - The :option:`--strict-whitespace` argument disables this behavior. End-of-line - sequences are canonicalized to UNIX-style ``\n`` in all modes. - -.. option:: --ignore-case - - By default, FileCheck uses case-sensitive matching. This option causes - FileCheck to use case-insensitive matching. - -.. option:: --implicit-check-not check-pattern - - Adds implicit negative checks for the specified patterns between positive - checks. The option allows writing stricter tests without stuffing them with - ``CHECK-NOT``\ s. - - For example, "``--implicit-check-not warning:``" can be useful when testing - diagnostic messages from tools that don't have an option similar to ``clang - -verify``. With this option FileCheck will verify that input does not contain - warnings not covered by any ``CHECK:`` patterns. - -.. option:: --dump-input - - Dump input to stderr, adding annotations representing currently enabled - diagnostics. Do this either 'always', on 'fail', or 'never'. Specify 'help' - to explain the dump format and quit. - -.. option:: --dump-input-on-failure - - When the check fails, dump all of the original input. This option is - deprecated in favor of `--dump-input=fail`. - -.. option:: --enable-var-scope - - Enables scope for regex variables. - - Variables with names that start with ``$`` are considered global and - remain set throughout the file. - - All other variables get undefined after each encountered ``CHECK-LABEL``. - -.. option:: -D - - Sets a filecheck pattern variable ``VAR`` with value ``VALUE`` that can be - used in ``CHECK:`` lines. - -.. option:: -D#= - - Sets a filecheck numeric variable ``NUMVAR`` to the result of evaluating - ```` that can be used in ``CHECK:`` lines. See section - ``FileCheck Numeric Variables and Expressions`` for details on supported - numeric expressions. - -.. option:: -version - - Show the version number of this program. - -.. option:: -v - - Print good directive pattern matches. However, if ``-input-dump=fail`` or - ``-input-dump=always``, add those matches as input annotations instead. - -.. option:: -vv - - Print information helpful in diagnosing internal FileCheck issues, such as - discarded overlapping ``CHECK-DAG:`` matches, implicit EOF pattern matches, - and ``CHECK-NOT:`` patterns that do not have matches. Implies ``-v``. - However, if ``-input-dump=fail`` or ``-input-dump=always``, just add that - information as input annotations instead. - -.. option:: --allow-deprecated-dag-overlap - - Enable overlapping among matches in a group of consecutive ``CHECK-DAG:`` - directives. This option is deprecated and is only provided for convenience - as old tests are migrated to the new non-overlapping ``CHECK-DAG:`` - implementation. - -.. option:: --color - - Use colors in output (autodetected by default). - -EXIT STATUS ------------ - -If :program:`FileCheck` verifies that the file matches the expected contents, -it exits with 0. Otherwise, if not, or if an error occurs, it will exit with a -non-zero value. - -TUTORIAL --------- - -FileCheck is typically used from LLVM regression tests, being invoked on the RUN -line of the test. A simple example of using FileCheck from a RUN line looks -like this: - -.. code-block:: llvm - - ; RUN: llvm-as < %s | llc -march=x86-64 | FileCheck %s - -This syntax says to pipe the current file ("``%s``") into ``llvm-as``, pipe -that into ``llc``, then pipe the output of ``llc`` into ``FileCheck``. This -means that FileCheck will be verifying its standard input (the llc output) -against the filename argument specified (the original ``.ll`` file specified by -"``%s``"). To see how this works, let's look at the rest of the ``.ll`` file -(after the RUN line): - -.. code-block:: llvm - - define void @sub1(i32* %p, i32 %v) { - entry: - ; CHECK: sub1: - ; CHECK: subl - %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v) - ret void - } - - define void @inc4(i64* %p) { - entry: - ; CHECK: inc4: - ; CHECK: incq - %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1) - ret void - } - -Here you can see some "``CHECK:``" lines specified in comments. Now you can -see how the file is piped into ``llvm-as``, then ``llc``, and the machine code -output is what we are verifying. FileCheck checks the machine code output to -verify that it matches what the "``CHECK:``" lines specify. - -The syntax of the "``CHECK:``" lines is very simple: they are fixed strings that -must occur in order. FileCheck defaults to ignoring horizontal whitespace -differences (e.g. a space is allowed to match a tab) but otherwise, the contents -of the "``CHECK:``" line is required to match some thing in the test file exactly. - -One nice thing about FileCheck (compared to grep) is that it allows merging -test cases together into logical groups. For example, because the test above -is checking for the "``sub1:``" and "``inc4:``" labels, it will not match -unless there is a "``subl``" in between those labels. If it existed somewhere -else in the file, that would not count: "``grep subl``" matches if "``subl``" -exists anywhere in the file. - -The FileCheck -check-prefix option -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The FileCheck `-check-prefix` option allows multiple test -configurations to be driven from one `.ll` file. This is useful in many -circumstances, for example, testing different architectural variants with -:program:`llc`. Here's a simple example: - -.. code-block:: llvm - - ; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin9 -mattr=sse41 \ - ; RUN: | FileCheck %s -check-prefix=X32 - ; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin9 -mattr=sse41 \ - ; RUN: | FileCheck %s -check-prefix=X64 - - define <4 x i32> @pinsrd_1(i32 %s, <4 x i32> %tmp) nounwind { - %tmp1 = insertelement <4 x i32>; %tmp, i32 %s, i32 1 - ret <4 x i32> %tmp1 - ; X32: pinsrd_1: - ; X32: pinsrd $1, 4(%esp), %xmm0 - - ; X64: pinsrd_1: - ; X64: pinsrd $1, %edi, %xmm0 - } - -In this case, we're testing that we get the expected code generation with -both 32-bit and 64-bit code generation. - -The "CHECK-NEXT:" directive -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Sometimes you want to match lines and would like to verify that matches -happen on exactly consecutive lines with no other lines in between them. In -this case, you can use "``CHECK:``" and "``CHECK-NEXT:``" directives to specify -this. If you specified a custom check prefix, just use "``-NEXT:``". -For example, something like this works as you'd expect: - -.. code-block:: llvm - - define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) { - %tmp3 = load <2 x double>* %A, align 16 - %tmp7 = insertelement <2 x double> undef, double %B, i32 0 - %tmp9 = shufflevector <2 x double> %tmp3, - <2 x double> %tmp7, - <2 x i32> < i32 0, i32 2 > - store <2 x double> %tmp9, <2 x double>* %r, align 16 - ret void - - ; CHECK: t2: - ; CHECK: movl 8(%esp), %eax - ; CHECK-NEXT: movapd (%eax), %xmm0 - ; CHECK-NEXT: movhpd 12(%esp), %xmm0 - ; CHECK-NEXT: movl 4(%esp), %eax - ; CHECK-NEXT: movapd %xmm0, (%eax) - ; CHECK-NEXT: ret - } - -"``CHECK-NEXT:``" directives reject the input unless there is exactly one -newline between it and the previous directive. A "``CHECK-NEXT:``" cannot be -the first directive in a file. - -The "CHECK-SAME:" directive -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Sometimes you want to match lines and would like to verify that matches happen -on the same line as the previous match. In this case, you can use "``CHECK:``" -and "``CHECK-SAME:``" directives to specify this. If you specified a custom -check prefix, just use "``-SAME:``". - -"``CHECK-SAME:``" is particularly powerful in conjunction with "``CHECK-NOT:``" -(described below). - -For example, the following works like you'd expect: - -.. code-block:: llvm - - !0 = !DILocation(line: 5, scope: !1, inlinedAt: !2) - - ; CHECK: !DILocation(line: 5, - ; CHECK-NOT: column: - ; CHECK-SAME: scope: ![[SCOPE:[0-9]+]] - -"``CHECK-SAME:``" directives reject the input if there are any newlines between -it and the previous directive. A "``CHECK-SAME:``" cannot be the first -directive in a file. - -The "CHECK-EMPTY:" directive -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If you need to check that the next line has nothing on it, not even whitespace, -you can use the "``CHECK-EMPTY:``" directive. - -.. code-block:: llvm - - declare void @foo() - - declare void @bar() - ; CHECK: foo - ; CHECK-EMPTY: - ; CHECK-NEXT: bar - -Just like "``CHECK-NEXT:``" the directive will fail if there is more than one -newline before it finds the next blank line, and it cannot be the first -directive in a file. - -The "CHECK-NOT:" directive -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The "``CHECK-NOT:``" directive is used to verify that a string doesn't occur -between two matches (or before the first match, or after the last match). For -example, to verify that a load is removed by a transformation, a test like this -can be used: - -.. code-block:: llvm - - define i8 @coerce_offset0(i32 %V, i32* %P) { - store i32 %V, i32* %P - - %P2 = bitcast i32* %P to i8* - %P3 = getelementptr i8* %P2, i32 2 - - %A = load i8* %P3 - ret i8 %A - ; CHECK: @coerce_offset0 - ; CHECK-NOT: load - ; CHECK: ret i8 - } - -The "CHECK-COUNT:" directive -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If you need to match multiple lines with the same pattern over and over again -you can repeat a plain ``CHECK:`` as many times as needed. If that looks too -boring you can instead use a counted check "``CHECK-COUNT-:``", where -```` is a positive decimal number. It will match the pattern exactly -```` times, no more and no less. If you specified a custom check prefix, -just use "``-COUNT-:``" for the same effect. -Here is a simple example: - -.. code-block:: text - - Loop at depth 1 - Loop at depth 1 - Loop at depth 1 - Loop at depth 1 - Loop at depth 2 - Loop at depth 3 - - ; CHECK-COUNT-6: Loop at depth {{[0-9]+}} - ; CHECK-NOT: Loop at depth {{[0-9]+}} - -The "CHECK-DAG:" directive -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If it's necessary to match strings that don't occur in a strictly sequential -order, "``CHECK-DAG:``" could be used to verify them between two matches (or -before the first match, or after the last match). For example, clang emits -vtable globals in reverse order. Using ``CHECK-DAG:``, we can keep the checks -in the natural order: - -.. code-block:: c++ - - // RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s - - struct Foo { virtual void method(); }; - Foo f; // emit vtable - // CHECK-DAG: @_ZTV3Foo = - - struct Bar { virtual void method(); }; - Bar b; - // CHECK-DAG: @_ZTV3Bar = - -``CHECK-NOT:`` directives could be mixed with ``CHECK-DAG:`` directives to -exclude strings between the surrounding ``CHECK-DAG:`` directives. As a result, -the surrounding ``CHECK-DAG:`` directives cannot be reordered, i.e. all -occurrences matching ``CHECK-DAG:`` before ``CHECK-NOT:`` must not fall behind -occurrences matching ``CHECK-DAG:`` after ``CHECK-NOT:``. For example, - -.. code-block:: llvm - - ; CHECK-DAG: BEFORE - ; CHECK-NOT: NOT - ; CHECK-DAG: AFTER - -This case will reject input strings where ``BEFORE`` occurs after ``AFTER``. - -With captured variables, ``CHECK-DAG:`` is able to match valid topological -orderings of a DAG with edges from the definition of a variable to its use. -It's useful, e.g., when your test cases need to match different output -sequences from the instruction scheduler. For example, - -.. code-block:: llvm - - ; CHECK-DAG: add [[REG1:r[0-9]+]], r1, r2 - ; CHECK-DAG: add [[REG2:r[0-9]+]], r3, r4 - ; CHECK: mul r5, [[REG1]], [[REG2]] - -In this case, any order of that two ``add`` instructions will be allowed. - -If you are defining `and` using variables in the same ``CHECK-DAG:`` block, -be aware that the definition rule can match `after` its use. - -So, for instance, the code below will pass: - -.. code-block:: text - - ; CHECK-DAG: vmov.32 [[REG2:d[0-9]+]][0] - ; CHECK-DAG: vmov.32 [[REG2]][1] - vmov.32 d0[1] - vmov.32 d0[0] - -While this other code, will not: - -.. code-block:: text - - ; CHECK-DAG: vmov.32 [[REG2:d[0-9]+]][0] - ; CHECK-DAG: vmov.32 [[REG2]][1] - vmov.32 d1[1] - vmov.32 d0[0] - -While this can be very useful, it's also dangerous, because in the case of -register sequence, you must have a strong order (read before write, copy before -use, etc). If the definition your test is looking for doesn't match (because -of a bug in the compiler), it may match further away from the use, and mask -real bugs away. - -In those cases, to enforce the order, use a non-DAG directive between DAG-blocks. - -A ``CHECK-DAG:`` directive skips matches that overlap the matches of any -preceding ``CHECK-DAG:`` directives in the same ``CHECK-DAG:`` block. Not only -is this non-overlapping behavior consistent with other directives, but it's -also necessary to handle sets of non-unique strings or patterns. For example, -the following directives look for unordered log entries for two tasks in a -parallel program, such as the OpenMP runtime: - -.. code-block:: text - - // CHECK-DAG: [[THREAD_ID:[0-9]+]]: task_begin - // CHECK-DAG: [[THREAD_ID]]: task_end - // - // CHECK-DAG: [[THREAD_ID:[0-9]+]]: task_begin - // CHECK-DAG: [[THREAD_ID]]: task_end - -The second pair of directives is guaranteed not to match the same log entries -as the first pair even though the patterns are identical and even if the text -of the log entries is identical because the thread ID manages to be reused. - -The "CHECK-LABEL:" directive -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Sometimes in a file containing multiple tests divided into logical blocks, one -or more ``CHECK:`` directives may inadvertently succeed by matching lines in a -later block. While an error will usually eventually be generated, the check -flagged as causing the error may not actually bear any relationship to the -actual source of the problem. - -In order to produce better error messages in these cases, the "``CHECK-LABEL:``" -directive can be used. It is treated identically to a normal ``CHECK`` -directive except that FileCheck makes an additional assumption that a line -matched by the directive cannot also be matched by any other check present in -``match-filename``; this is intended to be used for lines containing labels or -other unique identifiers. Conceptually, the presence of ``CHECK-LABEL`` divides -the input stream into separate blocks, each of which is processed independently, -preventing a ``CHECK:`` directive in one block matching a line in another block. -If ``--enable-var-scope`` is in effect, all local variables are cleared at the -beginning of the block. - -For example, - -.. code-block:: llvm - - define %struct.C* @C_ctor_base(%struct.C* %this, i32 %x) { - entry: - ; CHECK-LABEL: C_ctor_base: - ; CHECK: mov [[SAVETHIS:r[0-9]+]], r0 - ; CHECK: bl A_ctor_base - ; CHECK: mov r0, [[SAVETHIS]] - %0 = bitcast %struct.C* %this to %struct.A* - %call = tail call %struct.A* @A_ctor_base(%struct.A* %0) - %1 = bitcast %struct.C* %this to %struct.B* - %call2 = tail call %struct.B* @B_ctor_base(%struct.B* %1, i32 %x) - ret %struct.C* %this - } - - define %struct.D* @D_ctor_base(%struct.D* %this, i32 %x) { - entry: - ; CHECK-LABEL: D_ctor_base: - -The use of ``CHECK-LABEL:`` directives in this case ensures that the three -``CHECK:`` directives only accept lines corresponding to the body of the -``@C_ctor_base`` function, even if the patterns match lines found later in -the file. Furthermore, if one of these three ``CHECK:`` directives fail, -FileCheck will recover by continuing to the next block, allowing multiple test -failures to be detected in a single invocation. - -There is no requirement that ``CHECK-LABEL:`` directives contain strings that -correspond to actual syntactic labels in a source or output language: they must -simply uniquely match a single line in the file being verified. - -``CHECK-LABEL:`` directives cannot contain variable definitions or uses. - -FileCheck Regex Matching Syntax -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -All FileCheck directives take a pattern to match. -For most uses of FileCheck, fixed string matching is perfectly sufficient. For -some things, a more flexible form of matching is desired. To support this, -FileCheck allows you to specify regular expressions in matching strings, -surrounded by double braces: ``{{yourregex}}``. FileCheck implements a POSIX -regular expression matcher; it supports Extended POSIX regular expressions -(ERE). Because we want to use fixed string matching for a majority of what we -do, FileCheck has been designed to support mixing and matching fixed string -matching with regular expressions. This allows you to write things like this: - -.. code-block:: llvm - - ; CHECK: movhpd {{[0-9]+}}(%esp), {{%xmm[0-7]}} - -In this case, any offset from the ESP register will be allowed, and any xmm -register will be allowed. - -Because regular expressions are enclosed with double braces, they are -visually distinct, and you don't need to use escape characters within the double -braces like you would in C. In the rare case that you want to match double -braces explicitly from the input, you can use something ugly like -``{{[}][}]}}`` as your pattern. Or if you are using the repetition count -syntax, for example ``[[:xdigit:]]{8}`` to match exactly 8 hex digits, you -would need to add parentheses like this ``{{([[:xdigit:]]{8})}}`` to avoid -confusion with FileCheck's closing double-brace. - -FileCheck String Substitution Blocks -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -It is often useful to match a pattern and then verify that it occurs again -later in the file. For codegen tests, this can be useful to allow any -register, but verify that that register is used consistently later. To do -this, :program:`FileCheck` supports string substitution blocks that allow -string variables to be defined and substituted into patterns. Here is a simple -example: - -.. code-block:: llvm - - ; CHECK: test5: - ; CHECK: notw [[REGISTER:%[a-z]+]] - ; CHECK: andw {{.*}}[[REGISTER]] - -The first check line matches a regex ``%[a-z]+`` and captures it into the -string variable ``REGISTER``. The second line verifies that whatever is in -``REGISTER`` occurs later in the file after an "``andw``". :program:`FileCheck` -string substitution blocks are always contained in ``[[ ]]`` pairs, and string -variable names can be formed with the regex ``[a-zA-Z_][a-zA-Z0-9_]*``. If a -colon follows the name, then it is a definition of the variable; otherwise, it -is a substitution. - -:program:`FileCheck` variables can be defined multiple times, and substitutions -always get the latest value. Variables can also be substituted later on the -same line they were defined on. For example: - -.. code-block:: llvm - - ; CHECK: op [[REG:r[0-9]+]], [[REG]] - -Can be useful if you want the operands of ``op`` to be the same register, -and don't care exactly which register it is. - -If ``--enable-var-scope`` is in effect, variables with names that -start with ``$`` are considered to be global. All others variables are -local. All local variables get undefined at the beginning of each -CHECK-LABEL block. Global variables are not affected by CHECK-LABEL. -This makes it easier to ensure that individual tests are not affected -by variables set in preceding tests. - -FileCheck Numeric Substitution Blocks -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -:program:`FileCheck` also supports numeric substitution blocks that allow -defining numeric variables and checking for numeric values that satisfy a -numeric expression constraint based on those variables via a numeric -substitution. This allows ``CHECK:`` directives to verify a numeric relation -between two numbers, such as the need for consecutive registers to be used. - -The syntax to define a numeric variable is ``[[#:]]`` where -```` is the name of the numeric variable to define to the matching -value. - -For example: - -.. code-block:: llvm - - ; CHECK: mov r[[#REG:]], 42 - -would match ``mov r5, 42`` and set ``REG`` to the value ``5``. - -The syntax of a numeric substitution is ``[[#]]`` where ```` is an -expression. An expression is recursively defined as: - -* a numeric operand, or -* an expression followed by an operator and a numeric operand. - -A numeric operand is a previously defined numeric variable, or an integer -literal. The supported operators are ``+`` and ``-``. Spaces are accepted -before, after and between any of these elements. - -For example: - -.. code-block:: llvm - - ; CHECK: load r[[#REG:]], [r0] - ; CHECK: load r[[#REG+1]], [r1] - -The above example would match the text: - -.. code-block:: gas - - load r5, [r0] - load r6, [r1] - -but would not match the text: - -.. code-block:: gas - - load r5, [r0] - load r7, [r1] - -due to ``7`` being unequal to ``5 + 1``. - -The syntax also supports an empty expression, equivalent to writing {{[0-9]+}}, -for cases where the input must contain a numeric value but the value itself -does not matter: - -.. code-block:: gas - - ; CHECK-NOT: mov r0, r[[#]] - -to check that a value is synthesized rather than moved around. - -A numeric variable can also be defined to the result of a numeric expression, -in which case the numeric expression is checked and if verified the variable is -assigned to the value. The unified syntax for both defining numeric variables -and checking a numeric expression is thus ``[[#: ]]`` with each -element as described previously. - -The ``--enable-var-scope`` option has the same effect on numeric variables as -on string variables. - -Important note: In its current implementation, an expression cannot use a -numeric variable defined earlier in the same CHECK directive. - -FileCheck Pseudo Numeric Variables -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Sometimes there's a need to verify output that contains line numbers of the -match file, e.g. when testing compiler diagnostics. This introduces a certain -fragility of the match file structure, as "``CHECK:``" lines contain absolute -line numbers in the same file, which have to be updated whenever line numbers -change due to text addition or deletion. - -To support this case, FileCheck expressions understand the ``@LINE`` pseudo -numeric variable which evaluates to the line number of the CHECK pattern where -it is found. - -This way match patterns can be put near the relevant test lines and include -relative line number references, for example: - -.. code-block:: c++ - - // CHECK: test.cpp:[[# @LINE + 4]]:6: error: expected ';' after top level declarator - // CHECK-NEXT: {{^int a}} - // CHECK-NEXT: {{^ \^}} - // CHECK-NEXT: {{^ ;}} - int a - -To support legacy uses of ``@LINE`` as a special string variable, -:program:`FileCheck` also accepts the following uses of ``@LINE`` with string -substitution block syntax: ``[[@LINE]]``, ``[[@LINE+]]`` and -``[[@LINE-]]`` without any spaces inside the brackets and where -``offset`` is an integer. - -Matching Newline Characters -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -To match newline characters in regular expressions the character class -``[[:space:]]`` can be used. For example, the following pattern: - -.. code-block:: c++ - - // CHECK: DW_AT_location [DW_FORM_sec_offset] ([[DLOC:0x[0-9a-f]+]]){{[[:space:]].*}}"intd" - -matches output of the form (from llvm-dwarfdump): - -.. code-block:: text - - DW_AT_location [DW_FORM_sec_offset] (0x00000233) - DW_AT_name [DW_FORM_strp] ( .debug_str[0x000000c9] = "intd") - -letting us set the :program:`FileCheck` variable ``DLOC`` to the desired value -``0x00000233``, extracted from the line immediately preceding "``intd``". +FileCheck - Flexible pattern matching file verifier +=================================================== + +.. program:: FileCheck + +SYNOPSIS +-------- + +:program:`FileCheck` *match-filename* [*--check-prefix=XXX*] [*--strict-whitespace*] + +DESCRIPTION +----------- + +:program:`FileCheck` reads two files (one from standard input, and one +specified on the command line) and uses one to verify the other. This +behavior is particularly useful for the testsuite, which wants to verify that +the output of some tool (e.g. :program:`llc`) contains the expected information +(for example, a movsd from esp or whatever is interesting). This is similar to +using :program:`grep`, but it is optimized for matching multiple different +inputs in one file in a specific order. + +The ``match-filename`` file specifies the file that contains the patterns to +match. The file to verify is read from standard input unless the +:option:`--input-file` option is used. + +OPTIONS +------- + +Options are parsed from the environment variable ``FILECHECK_OPTS`` +and from the command line. + +.. option:: -help + + Print a summary of command line options. + +.. option:: --check-prefix prefix + + FileCheck searches the contents of ``match-filename`` for patterns to + match. By default, these patterns are prefixed with "``CHECK:``". + If you'd like to use a different prefix (e.g. because the same input + file is checking multiple different tool or options), the + :option:`--check-prefix` argument allows you to specify one or more + prefixes to match. Multiple prefixes are useful for tests which might + change for different run options, but most lines remain the same. + +.. option:: --check-prefixes prefix1,prefix2,... + + An alias of :option:`--check-prefix` that allows multiple prefixes to be + specified as a comma separated list. + +.. option:: --input-file filename + + File to check (defaults to stdin). + +.. option:: --match-full-lines + + By default, FileCheck allows matches of anywhere on a line. This + option will require all positive matches to cover an entire + line. Leading and trailing whitespace is ignored, unless + :option:`--strict-whitespace` is also specified. (Note: negative + matches from ``CHECK-NOT`` are not affected by this option!) + + Passing this option is equivalent to inserting ``{{^ *}}`` or + ``{{^}}`` before, and ``{{ *$}}`` or ``{{$}}`` after every positive + check pattern. + +.. option:: --strict-whitespace + + By default, FileCheck canonicalizes input horizontal whitespace (spaces and + tabs) which causes it to ignore these differences (a space will match a tab). + The :option:`--strict-whitespace` argument disables this behavior. End-of-line + sequences are canonicalized to UNIX-style ``\n`` in all modes. + +.. option:: --implicit-check-not check-pattern + + Adds implicit negative checks for the specified patterns between positive + checks. The option allows writing stricter tests without stuffing them with + ``CHECK-NOT``\ s. + + For example, "``--implicit-check-not warning:``" can be useful when testing + diagnostic messages from tools that don't have an option similar to ``clang + -verify``. With this option FileCheck will verify that input does not contain + warnings not covered by any ``CHECK:`` patterns. + +.. option:: --dump-input + + Dump input to stderr, adding annotations representing currently enabled + diagnostics. Do this either 'always', on 'fail', or 'never'. Specify 'help' + to explain the dump format and quit. + +.. option:: --dump-input-on-failure + + When the check fails, dump all of the original input. This option is + deprecated in favor of `--dump-input=fail`. + +.. option:: --enable-var-scope + + Enables scope for regex variables. + + Variables with names that start with ``$`` are considered global and + remain set throughout the file. + + All other variables get undefined after each encountered ``CHECK-LABEL``. + +.. option:: -D + + Sets a filecheck pattern variable ``VAR`` with value ``VALUE`` that can be + used in ``CHECK:`` lines. + +.. option:: -D#= + + Sets a filecheck numeric variable ``NUMVAR`` to the result of evaluating + ```` that can be used in ``CHECK:`` lines. See section + ``FileCheck Numeric Variables and Expressions`` for details on supported + numeric expressions. + +.. option:: -version + + Show the version number of this program. + +.. option:: -v + + Print good directive pattern matches. However, if ``-input-dump=fail`` or + ``-input-dump=always``, add those matches as input annotations instead. + +.. option:: -vv + + Print information helpful in diagnosing internal FileCheck issues, such as + discarded overlapping ``CHECK-DAG:`` matches, implicit EOF pattern matches, + and ``CHECK-NOT:`` patterns that do not have matches. Implies ``-v``. + However, if ``-input-dump=fail`` or ``-input-dump=always``, just add that + information as input annotations instead. + +.. option:: --allow-deprecated-dag-overlap + + Enable overlapping among matches in a group of consecutive ``CHECK-DAG:`` + directives. This option is deprecated and is only provided for convenience + as old tests are migrated to the new non-overlapping ``CHECK-DAG:`` + implementation. + +.. option:: --color + + Use colors in output (autodetected by default). + +EXIT STATUS +----------- + +If :program:`FileCheck` verifies that the file matches the expected contents, +it exits with 0. Otherwise, if not, or if an error occurs, it will exit with a +non-zero value. + +TUTORIAL +-------- + +FileCheck is typically used from LLVM regression tests, being invoked on the RUN +line of the test. A simple example of using FileCheck from a RUN line looks +like this: + +.. code-block:: llvm + + ; RUN: llvm-as < %s | llc -march=x86-64 | FileCheck %s + +This syntax says to pipe the current file ("``%s``") into ``llvm-as``, pipe +that into ``llc``, then pipe the output of ``llc`` into ``FileCheck``. This +means that FileCheck will be verifying its standard input (the llc output) +against the filename argument specified (the original ``.ll`` file specified by +"``%s``"). To see how this works, let's look at the rest of the ``.ll`` file +(after the RUN line): + +.. code-block:: llvm + + define void @sub1(i32* %p, i32 %v) { + entry: + ; CHECK: sub1: + ; CHECK: subl + %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v) + ret void + } + + define void @inc4(i64* %p) { + entry: + ; CHECK: inc4: + ; CHECK: incq + %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1) + ret void + } + +Here you can see some "``CHECK:``" lines specified in comments. Now you can +see how the file is piped into ``llvm-as``, then ``llc``, and the machine code +output is what we are verifying. FileCheck checks the machine code output to +verify that it matches what the "``CHECK:``" lines specify. + +The syntax of the "``CHECK:``" lines is very simple: they are fixed strings that +must occur in order. FileCheck defaults to ignoring horizontal whitespace +differences (e.g. a space is allowed to match a tab) but otherwise, the contents +of the "``CHECK:``" line is required to match some thing in the test file exactly. + +One nice thing about FileCheck (compared to grep) is that it allows merging +test cases together into logical groups. For example, because the test above +is checking for the "``sub1:``" and "``inc4:``" labels, it will not match +unless there is a "``subl``" in between those labels. If it existed somewhere +else in the file, that would not count: "``grep subl``" matches if "``subl``" +exists anywhere in the file. + +The FileCheck -check-prefix option +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The FileCheck `-check-prefix` option allows multiple test +configurations to be driven from one `.ll` file. This is useful in many +circumstances, for example, testing different architectural variants with +:program:`llc`. Here's a simple example: + +.. code-block:: llvm + + ; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin9 -mattr=sse41 \ + ; RUN: | FileCheck %s -check-prefix=X32 + ; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin9 -mattr=sse41 \ + ; RUN: | FileCheck %s -check-prefix=X64 + + define <4 x i32> @pinsrd_1(i32 %s, <4 x i32> %tmp) nounwind { + %tmp1 = insertelement <4 x i32>; %tmp, i32 %s, i32 1 + ret <4 x i32> %tmp1 + ; X32: pinsrd_1: + ; X32: pinsrd $1, 4(%esp), %xmm0 + + ; X64: pinsrd_1: + ; X64: pinsrd $1, %edi, %xmm0 + } + +In this case, we're testing that we get the expected code generation with +both 32-bit and 64-bit code generation. + +The "CHECK-NEXT:" directive +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Sometimes you want to match lines and would like to verify that matches +happen on exactly consecutive lines with no other lines in between them. In +this case, you can use "``CHECK:``" and "``CHECK-NEXT:``" directives to specify +this. If you specified a custom check prefix, just use "``-NEXT:``". +For example, something like this works as you'd expect: + +.. code-block:: llvm + + define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) { + %tmp3 = load <2 x double>* %A, align 16 + %tmp7 = insertelement <2 x double> undef, double %B, i32 0 + %tmp9 = shufflevector <2 x double> %tmp3, + <2 x double> %tmp7, + <2 x i32> < i32 0, i32 2 > + store <2 x double> %tmp9, <2 x double>* %r, align 16 + ret void + + ; CHECK: t2: + ; CHECK: movl 8(%esp), %eax + ; CHECK-NEXT: movapd (%eax), %xmm0 + ; CHECK-NEXT: movhpd 12(%esp), %xmm0 + ; CHECK-NEXT: movl 4(%esp), %eax + ; CHECK-NEXT: movapd %xmm0, (%eax) + ; CHECK-NEXT: ret + } + +"``CHECK-NEXT:``" directives reject the input unless there is exactly one +newline between it and the previous directive. A "``CHECK-NEXT:``" cannot be +the first directive in a file. + +The "CHECK-SAME:" directive +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Sometimes you want to match lines and would like to verify that matches happen +on the same line as the previous match. In this case, you can use "``CHECK:``" +and "``CHECK-SAME:``" directives to specify this. If you specified a custom +check prefix, just use "``-SAME:``". + +"``CHECK-SAME:``" is particularly powerful in conjunction with "``CHECK-NOT:``" +(described below). + +For example, the following works like you'd expect: + +.. code-block:: llvm + + !0 = !DILocation(line: 5, scope: !1, inlinedAt: !2) + + ; CHECK: !DILocation(line: 5, + ; CHECK-NOT: column: + ; CHECK-SAME: scope: ![[SCOPE:[0-9]+]] + +"``CHECK-SAME:``" directives reject the input if there are any newlines between +it and the previous directive. A "``CHECK-SAME:``" cannot be the first +directive in a file. + +The "CHECK-EMPTY:" directive +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you need to check that the next line has nothing on it, not even whitespace, +you can use the "``CHECK-EMPTY:``" directive. + +.. code-block:: llvm + + declare void @foo() + + declare void @bar() + ; CHECK: foo + ; CHECK-EMPTY: + ; CHECK-NEXT: bar + +Just like "``CHECK-NEXT:``" the directive will fail if there is more than one +newline before it finds the next blank line, and it cannot be the first +directive in a file. + +The "CHECK-NOT:" directive +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The "``CHECK-NOT:``" directive is used to verify that a string doesn't occur +between two matches (or before the first match, or after the last match). For +example, to verify that a load is removed by a transformation, a test like this +can be used: + +.. code-block:: llvm + + define i8 @coerce_offset0(i32 %V, i32* %P) { + store i32 %V, i32* %P + + %P2 = bitcast i32* %P to i8* + %P3 = getelementptr i8* %P2, i32 2 + + %A = load i8* %P3 + ret i8 %A + ; CHECK: @coerce_offset0 + ; CHECK-NOT: load + ; CHECK: ret i8 + } + +The "CHECK-COUNT:" directive +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you need to match multiple lines with the same pattern over and over again +you can repeat a plain ``CHECK:`` as many times as needed. If that looks too +boring you can instead use a counted check "``CHECK-COUNT-:``", where +```` is a positive decimal number. It will match the pattern exactly +```` times, no more and no less. If you specified a custom check prefix, +just use "``-COUNT-:``" for the same effect. +Here is a simple example: + +.. code-block:: text + + Loop at depth 1 + Loop at depth 1 + Loop at depth 1 + Loop at depth 1 + Loop at depth 2 + Loop at depth 3 + + ; CHECK-COUNT-6: Loop at depth {{[0-9]+}} + ; CHECK-NOT: Loop at depth {{[0-9]+}} + +The "CHECK-DAG:" directive +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If it's necessary to match strings that don't occur in a strictly sequential +order, "``CHECK-DAG:``" could be used to verify them between two matches (or +before the first match, or after the last match). For example, clang emits +vtable globals in reverse order. Using ``CHECK-DAG:``, we can keep the checks +in the natural order: + +.. code-block:: c++ + + // RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s + + struct Foo { virtual void method(); }; + Foo f; // emit vtable + // CHECK-DAG: @_ZTV3Foo = + + struct Bar { virtual void method(); }; + Bar b; + // CHECK-DAG: @_ZTV3Bar = + +``CHECK-NOT:`` directives could be mixed with ``CHECK-DAG:`` directives to +exclude strings between the surrounding ``CHECK-DAG:`` directives. As a result, +the surrounding ``CHECK-DAG:`` directives cannot be reordered, i.e. all +occurrences matching ``CHECK-DAG:`` before ``CHECK-NOT:`` must not fall behind +occurrences matching ``CHECK-DAG:`` after ``CHECK-NOT:``. For example, + +.. code-block:: llvm + + ; CHECK-DAG: BEFORE + ; CHECK-NOT: NOT + ; CHECK-DAG: AFTER + +This case will reject input strings where ``BEFORE`` occurs after ``AFTER``. + +With captured variables, ``CHECK-DAG:`` is able to match valid topological +orderings of a DAG with edges from the definition of a variable to its use. +It's useful, e.g., when your test cases need to match different output +sequences from the instruction scheduler. For example, + +.. code-block:: llvm + + ; CHECK-DAG: add [[REG1:r[0-9]+]], r1, r2 + ; CHECK-DAG: add [[REG2:r[0-9]+]], r3, r4 + ; CHECK: mul r5, [[REG1]], [[REG2]] + +In this case, any order of that two ``add`` instructions will be allowed. + +If you are defining `and` using variables in the same ``CHECK-DAG:`` block, +be aware that the definition rule can match `after` its use. + +So, for instance, the code below will pass: + +.. code-block:: text + + ; CHECK-DAG: vmov.32 [[REG2:d[0-9]+]][0] + ; CHECK-DAG: vmov.32 [[REG2]][1] + vmov.32 d0[1] + vmov.32 d0[0] + +While this other code, will not: + +.. code-block:: text + + ; CHECK-DAG: vmov.32 [[REG2:d[0-9]+]][0] + ; CHECK-DAG: vmov.32 [[REG2]][1] + vmov.32 d1[1] + vmov.32 d0[0] + +While this can be very useful, it's also dangerous, because in the case of +register sequence, you must have a strong order (read before write, copy before +use, etc). If the definition your test is looking for doesn't match (because +of a bug in the compiler), it may match further away from the use, and mask +real bugs away. + +In those cases, to enforce the order, use a non-DAG directive between DAG-blocks. + +A ``CHECK-DAG:`` directive skips matches that overlap the matches of any +preceding ``CHECK-DAG:`` directives in the same ``CHECK-DAG:`` block. Not only +is this non-overlapping behavior consistent with other directives, but it's +also necessary to handle sets of non-unique strings or patterns. For example, +the following directives look for unordered log entries for two tasks in a +parallel program, such as the OpenMP runtime: + +.. code-block:: text + + // CHECK-DAG: [[THREAD_ID:[0-9]+]]: task_begin + // CHECK-DAG: [[THREAD_ID]]: task_end + // + // CHECK-DAG: [[THREAD_ID:[0-9]+]]: task_begin + // CHECK-DAG: [[THREAD_ID]]: task_end + +The second pair of directives is guaranteed not to match the same log entries +as the first pair even though the patterns are identical and even if the text +of the log entries is identical because the thread ID manages to be reused. + +The "CHECK-LABEL:" directive +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Sometimes in a file containing multiple tests divided into logical blocks, one +or more ``CHECK:`` directives may inadvertently succeed by matching lines in a +later block. While an error will usually eventually be generated, the check +flagged as causing the error may not actually bear any relationship to the +actual source of the problem. + +In order to produce better error messages in these cases, the "``CHECK-LABEL:``" +directive can be used. It is treated identically to a normal ``CHECK`` +directive except that FileCheck makes an additional assumption that a line +matched by the directive cannot also be matched by any other check present in +``match-filename``; this is intended to be used for lines containing labels or +other unique identifiers. Conceptually, the presence of ``CHECK-LABEL`` divides +the input stream into separate blocks, each of which is processed independently, +preventing a ``CHECK:`` directive in one block matching a line in another block. +If ``--enable-var-scope`` is in effect, all local variables are cleared at the +beginning of the block. + +For example, + +.. code-block:: llvm + + define %struct.C* @C_ctor_base(%struct.C* %this, i32 %x) { + entry: + ; CHECK-LABEL: C_ctor_base: + ; CHECK: mov [[SAVETHIS:r[0-9]+]], r0 + ; CHECK: bl A_ctor_base + ; CHECK: mov r0, [[SAVETHIS]] + %0 = bitcast %struct.C* %this to %struct.A* + %call = tail call %struct.A* @A_ctor_base(%struct.A* %0) + %1 = bitcast %struct.C* %this to %struct.B* + %call2 = tail call %struct.B* @B_ctor_base(%struct.B* %1, i32 %x) + ret %struct.C* %this + } + + define %struct.D* @D_ctor_base(%struct.D* %this, i32 %x) { + entry: + ; CHECK-LABEL: D_ctor_base: + +The use of ``CHECK-LABEL:`` directives in this case ensures that the three +``CHECK:`` directives only accept lines corresponding to the body of the +``@C_ctor_base`` function, even if the patterns match lines found later in +the file. Furthermore, if one of these three ``CHECK:`` directives fail, +FileCheck will recover by continuing to the next block, allowing multiple test +failures to be detected in a single invocation. + +There is no requirement that ``CHECK-LABEL:`` directives contain strings that +correspond to actual syntactic labels in a source or output language: they must +simply uniquely match a single line in the file being verified. + +``CHECK-LABEL:`` directives cannot contain variable definitions or uses. + +FileCheck Regex Matching Syntax +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +All FileCheck directives take a pattern to match. +For most uses of FileCheck, fixed string matching is perfectly sufficient. For +some things, a more flexible form of matching is desired. To support this, +FileCheck allows you to specify regular expressions in matching strings, +surrounded by double braces: ``{{yourregex}}``. FileCheck implements a POSIX +regular expression matcher; it supports Extended POSIX regular expressions +(ERE). Because we want to use fixed string matching for a majority of what we +do, FileCheck has been designed to support mixing and matching fixed string +matching with regular expressions. This allows you to write things like this: + +.. code-block:: llvm + + ; CHECK: movhpd {{[0-9]+}}(%esp), {{%xmm[0-7]}} + +In this case, any offset from the ESP register will be allowed, and any xmm +register will be allowed. + +Because regular expressions are enclosed with double braces, they are +visually distinct, and you don't need to use escape characters within the double +braces like you would in C. In the rare case that you want to match double +braces explicitly from the input, you can use something ugly like +``{{[}][}]}}`` as your pattern. Or if you are using the repetition count +syntax, for example ``[[:xdigit:]]{8}`` to match exactly 8 hex digits, you +would need to add parentheses like this ``{{([[:xdigit:]]{8})}}`` to avoid +confusion with FileCheck's closing double-brace. + +FileCheck String Substitution Blocks +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It is often useful to match a pattern and then verify that it occurs again +later in the file. For codegen tests, this can be useful to allow any +register, but verify that that register is used consistently later. To do +this, :program:`FileCheck` supports string substitution blocks that allow +string variables to be defined and substituted into patterns. Here is a simple +example: + +.. code-block:: llvm + + ; CHECK: test5: + ; CHECK: notw [[REGISTER:%[a-z]+]] + ; CHECK: andw {{.*}}[[REGISTER]] + +The first check line matches a regex ``%[a-z]+`` and captures it into the +string variable ``REGISTER``. The second line verifies that whatever is in +``REGISTER`` occurs later in the file after an "``andw``". :program:`FileCheck` +string substitution blocks are always contained in ``[[ ]]`` pairs, and string +variable names can be formed with the regex ``[a-zA-Z_][a-zA-Z0-9_]*``. If a +colon follows the name, then it is a definition of the variable; otherwise, it +is a substitution. + +:program:`FileCheck` variables can be defined multiple times, and substitutions +always get the latest value. Variables can also be substituted later on the +same line they were defined on. For example: + +.. code-block:: llvm + + ; CHECK: op [[REG:r[0-9]+]], [[REG]] + +Can be useful if you want the operands of ``op`` to be the same register, +and don't care exactly which register it is. + +If ``--enable-var-scope`` is in effect, variables with names that +start with ``$`` are considered to be global. All others variables are +local. All local variables get undefined at the beginning of each +CHECK-LABEL block. Global variables are not affected by CHECK-LABEL. +This makes it easier to ensure that individual tests are not affected +by variables set in preceding tests. + +FileCheck Numeric Substitution Blocks +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:program:`FileCheck` also supports numeric substitution blocks that allow +defining numeric variables and checking for numeric values that satisfy a +numeric expression constraint based on those variables via a numeric +substitution. This allows ``CHECK:`` directives to verify a numeric relation +between two numbers, such as the need for consecutive registers to be used. + +The syntax to define a numeric variable is ``[[#:]]`` where +```` is the name of the numeric variable to define to the matching +value. + +For example: + +.. code-block:: llvm + + ; CHECK: mov r[[#REG:]], 42 + +would match ``mov r5, 42`` and set ``REG`` to the value ``5``. + +The syntax of a numeric substitution is ``[[#]]`` where ```` is an +expression. An expression is recursively defined as: + +* a numeric operand, or +* an expression followed by an operator and a numeric operand. + +A numeric operand is a previously defined numeric variable, or an integer +literal. The supported operators are ``+`` and ``-``. Spaces are accepted +before, after and between any of these elements. + +For example: + +.. code-block:: llvm + + ; CHECK: load r[[#REG:]], [r0] + ; CHECK: load r[[#REG+1]], [r1] + +The above example would match the text: + +.. code-block:: gas + + load r5, [r0] + load r6, [r1] + +but would not match the text: + +.. code-block:: gas + + load r5, [r0] + load r7, [r1] + +due to ``7`` being unequal to ``5 + 1``. + +The syntax also supports an empty expression, equivalent to writing {{[0-9]+}}, +for cases where the input must contain a numeric value but the value itself +does not matter: + +.. code-block:: gas + + ; CHECK-NOT: mov r0, r[[#]] + +to check that a value is synthesized rather than moved around. + +A numeric variable can also be defined to the result of a numeric expression, +in which case the numeric expression is checked and if verified the variable is +assigned to the value. The unified syntax for both defining numeric variables +and checking a numeric expression is thus ``[[#: ]]`` with each +element as described previously. + +The ``--enable-var-scope`` option has the same effect on numeric variables as +on string variables. + +Important note: In its current implementation, an expression cannot use a +numeric variable defined earlier in the same CHECK directive. + +FileCheck Pseudo Numeric Variables +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Sometimes there's a need to verify output that contains line numbers of the +match file, e.g. when testing compiler diagnostics. This introduces a certain +fragility of the match file structure, as "``CHECK:``" lines contain absolute +line numbers in the same file, which have to be updated whenever line numbers +change due to text addition or deletion. + +To support this case, FileCheck expressions understand the ``@LINE`` pseudo +numeric variable which evaluates to the line number of the CHECK pattern where +it is found. + +This way match patterns can be put near the relevant test lines and include +relative line number references, for example: + +.. code-block:: c++ + + // CHECK: test.cpp:[[# @LINE + 4]]:6: error: expected ';' after top level declarator + // CHECK-NEXT: {{^int a}} + // CHECK-NEXT: {{^ \^}} + // CHECK-NEXT: {{^ ;}} + int a + +To support legacy uses of ``@LINE`` as a special string variable, +:program:`FileCheck` also accepts the following uses of ``@LINE`` with string +substitution block syntax: ``[[@LINE]]``, ``[[@LINE+]]`` and +``[[@LINE-]]`` without any spaces inside the brackets and where +``offset`` is an integer. + +Matching Newline Characters +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To match newline characters in regular expressions the character class +``[[:space:]]`` can be used. For example, the following pattern: + +.. code-block:: c++ + + // CHECK: DW_AT_location [DW_FORM_sec_offset] ([[DLOC:0x[0-9a-f]+]]){{[[:space:]].*}}"intd" + +matches output of the form (from llvm-dwarfdump): + +.. code-block:: text + + DW_AT_location [DW_FORM_sec_offset] (0x00000233) + DW_AT_name [DW_FORM_strp] ( .debug_str[0x000000c9] = "intd") + +letting us set the :program:`FileCheck` variable ``DLOC`` to the desired value +``0x00000233``, extracted from the line immediately preceding "``intd``". diff --git a/include/llvm/Support/FileCheck.h b/include/llvm/Support/FileCheck.h index dc6dc7496af..5c6585ed76f 100644 --- a/include/llvm/Support/FileCheck.h +++ b/include/llvm/Support/FileCheck.h @@ -1,181 +1,180 @@ -//==-- llvm/Support/FileCheck.h ---------------------------*- C++ -*-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file This file has some utilities to use FileCheck as an API -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_SUPPORT_FILECHECK_H -#define LLVM_SUPPORT_FILECHECK_H - -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Regex.h" -#include "llvm/Support/SourceMgr.h" -#include -#include - -namespace llvm { - -/// Contains info about various FileCheck options. -struct FileCheckRequest { - std::vector CheckPrefixes; - bool NoCanonicalizeWhiteSpace = false; - std::vector ImplicitCheckNot; - std::vector GlobalDefines; - bool AllowEmptyInput = false; - bool MatchFullLines = false; - bool IgnoreCase = false; - bool EnableVarScope = false; - bool AllowDeprecatedDagOverlap = false; - bool Verbose = false; - bool VerboseVerbose = false; -}; - -//===----------------------------------------------------------------------===// -// Summary of a FileCheck diagnostic. -//===----------------------------------------------------------------------===// - -namespace Check { - -enum FileCheckKind { - CheckNone = 0, - CheckPlain, - CheckNext, - CheckSame, - CheckNot, - CheckDAG, - CheckLabel, - CheckEmpty, - - /// Indicates the pattern only matches the end of file. This is used for - /// trailing CHECK-NOTs. - CheckEOF, - - /// Marks when parsing found a -NOT check combined with another CHECK suffix. - CheckBadNot, - - /// Marks when parsing found a -COUNT directive with invalid count value. - CheckBadCount -}; - -class FileCheckType { - FileCheckKind Kind; - int Count; ///< optional Count for some checks - -public: - FileCheckType(FileCheckKind Kind = CheckNone) : Kind(Kind), Count(1) {} - FileCheckType(const FileCheckType &) = default; - - operator FileCheckKind() const { return Kind; } - - int getCount() const { return Count; } - FileCheckType &setCount(int C); - - // \returns a description of \p Prefix. - std::string getDescription(StringRef Prefix) const; -}; -} // namespace Check - -struct FileCheckDiag { - /// What is the FileCheck directive for this diagnostic? - Check::FileCheckType CheckTy; - /// Where is the FileCheck directive for this diagnostic? - unsigned CheckLine, CheckCol; - /// What type of match result does this diagnostic describe? - /// - /// A directive's supplied pattern is said to be either expected or excluded - /// depending on whether the pattern must have or must not have a match in - /// order for the directive to succeed. For example, a CHECK directive's - /// pattern is expected, and a CHECK-NOT directive's pattern is excluded. - /// All match result types whose names end with "Excluded" are for excluded - /// patterns, and all others are for expected patterns. - /// - /// There might be more than one match result for a single pattern. For - /// example, there might be several discarded matches - /// (MatchFoundButDiscarded) before either a good match - /// (MatchFoundAndExpected) or a failure to match (MatchNoneButExpected), - /// and there might be a fuzzy match (MatchFuzzy) after the latter. - enum MatchType { - /// Indicates a good match for an expected pattern. - MatchFoundAndExpected, - /// Indicates a match for an excluded pattern. - MatchFoundButExcluded, - /// Indicates a match for an expected pattern, but the match is on the - /// wrong line. - MatchFoundButWrongLine, - /// Indicates a discarded match for an expected pattern. - MatchFoundButDiscarded, - /// Indicates no match for an excluded pattern. - MatchNoneAndExcluded, - /// Indicates no match for an expected pattern, but this might follow good - /// matches when multiple matches are expected for the pattern, or it might - /// follow discarded matches for the pattern. - MatchNoneButExpected, - /// Indicates a fuzzy match that serves as a suggestion for the next - /// intended match for an expected pattern with too few or no good matches. - MatchFuzzy, - } MatchTy; - /// The search range if MatchTy is MatchNoneAndExcluded or - /// MatchNoneButExpected, or the match range otherwise. - unsigned InputStartLine; - unsigned InputStartCol; - unsigned InputEndLine; - unsigned InputEndCol; - FileCheckDiag(const SourceMgr &SM, const Check::FileCheckType &CheckTy, - SMLoc CheckLoc, MatchType MatchTy, SMRange InputRange); -}; - -class FileCheckPatternContext; -struct FileCheckString; - -/// FileCheck class takes the request and exposes various methods that -/// use information from the request. -class FileCheck { - FileCheckRequest Req; - std::unique_ptr PatternContext; - // C++17 TODO: make this a plain std::vector. - std::unique_ptr> CheckStrings; - -public: - explicit FileCheck(FileCheckRequest Req); - ~FileCheck(); - - // Combines the check prefixes into a single regex so that we can efficiently - // scan for any of the set. - // - // The semantics are that the longest-match wins which matches our regex - // library. - Regex buildCheckPrefixRegex(); - - /// Reads the check file from \p Buffer and records the expected strings it - /// contains. Errors are reported against \p SM. - /// - /// Only expected strings whose prefix is one of those listed in \p PrefixRE - /// are recorded. \returns true in case of an error, false otherwise. - bool readCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE); - - bool ValidateCheckPrefixes(); - - /// Canonicalizes whitespaces in the file. Line endings are replaced with - /// UNIX-style '\n'. - StringRef CanonicalizeFile(MemoryBuffer &MB, - SmallVectorImpl &OutputBuffer); - - /// Checks the input to FileCheck provided in the \p Buffer against the - /// expected strings read from the check file and record diagnostics emitted - /// in \p Diags. Errors are recorded against \p SM. - /// - /// \returns false if the input fails to satisfy the checks. - bool checkInput(SourceMgr &SM, StringRef Buffer, - std::vector *Diags = nullptr); -}; - -} // namespace llvm - -#endif +//==-- llvm/Support/FileCheck.h ---------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This file has some utilities to use FileCheck as an API +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_FILECHECK_H +#define LLVM_SUPPORT_FILECHECK_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/SourceMgr.h" +#include +#include + +namespace llvm { + +/// Contains info about various FileCheck options. +struct FileCheckRequest { + std::vector CheckPrefixes; + bool NoCanonicalizeWhiteSpace = false; + std::vector ImplicitCheckNot; + std::vector GlobalDefines; + bool AllowEmptyInput = false; + bool MatchFullLines = false; + bool EnableVarScope = false; + bool AllowDeprecatedDagOverlap = false; + bool Verbose = false; + bool VerboseVerbose = false; +}; + +//===----------------------------------------------------------------------===// +// Summary of a FileCheck diagnostic. +//===----------------------------------------------------------------------===// + +namespace Check { + +enum FileCheckKind { + CheckNone = 0, + CheckPlain, + CheckNext, + CheckSame, + CheckNot, + CheckDAG, + CheckLabel, + CheckEmpty, + + /// Indicates the pattern only matches the end of file. This is used for + /// trailing CHECK-NOTs. + CheckEOF, + + /// Marks when parsing found a -NOT check combined with another CHECK suffix. + CheckBadNot, + + /// Marks when parsing found a -COUNT directive with invalid count value. + CheckBadCount +}; + +class FileCheckType { + FileCheckKind Kind; + int Count; ///< optional Count for some checks + +public: + FileCheckType(FileCheckKind Kind = CheckNone) : Kind(Kind), Count(1) {} + FileCheckType(const FileCheckType &) = default; + + operator FileCheckKind() const { return Kind; } + + int getCount() const { return Count; } + FileCheckType &setCount(int C); + + // \returns a description of \p Prefix. + std::string getDescription(StringRef Prefix) const; +}; +} // namespace Check + +struct FileCheckDiag { + /// What is the FileCheck directive for this diagnostic? + Check::FileCheckType CheckTy; + /// Where is the FileCheck directive for this diagnostic? + unsigned CheckLine, CheckCol; + /// What type of match result does this diagnostic describe? + /// + /// A directive's supplied pattern is said to be either expected or excluded + /// depending on whether the pattern must have or must not have a match in + /// order for the directive to succeed. For example, a CHECK directive's + /// pattern is expected, and a CHECK-NOT directive's pattern is excluded. + /// All match result types whose names end with "Excluded" are for excluded + /// patterns, and all others are for expected patterns. + /// + /// There might be more than one match result for a single pattern. For + /// example, there might be several discarded matches + /// (MatchFoundButDiscarded) before either a good match + /// (MatchFoundAndExpected) or a failure to match (MatchNoneButExpected), + /// and there might be a fuzzy match (MatchFuzzy) after the latter. + enum MatchType { + /// Indicates a good match for an expected pattern. + MatchFoundAndExpected, + /// Indicates a match for an excluded pattern. + MatchFoundButExcluded, + /// Indicates a match for an expected pattern, but the match is on the + /// wrong line. + MatchFoundButWrongLine, + /// Indicates a discarded match for an expected pattern. + MatchFoundButDiscarded, + /// Indicates no match for an excluded pattern. + MatchNoneAndExcluded, + /// Indicates no match for an expected pattern, but this might follow good + /// matches when multiple matches are expected for the pattern, or it might + /// follow discarded matches for the pattern. + MatchNoneButExpected, + /// Indicates a fuzzy match that serves as a suggestion for the next + /// intended match for an expected pattern with too few or no good matches. + MatchFuzzy, + } MatchTy; + /// The search range if MatchTy is MatchNoneAndExcluded or + /// MatchNoneButExpected, or the match range otherwise. + unsigned InputStartLine; + unsigned InputStartCol; + unsigned InputEndLine; + unsigned InputEndCol; + FileCheckDiag(const SourceMgr &SM, const Check::FileCheckType &CheckTy, + SMLoc CheckLoc, MatchType MatchTy, SMRange InputRange); +}; + +class FileCheckPatternContext; +struct FileCheckString; + +/// FileCheck class takes the request and exposes various methods that +/// use information from the request. +class FileCheck { + FileCheckRequest Req; + std::unique_ptr PatternContext; + // C++17 TODO: make this a plain std::vector. + std::unique_ptr> CheckStrings; + +public: + explicit FileCheck(FileCheckRequest Req); + ~FileCheck(); + + // Combines the check prefixes into a single regex so that we can efficiently + // scan for any of the set. + // + // The semantics are that the longest-match wins which matches our regex + // library. + Regex buildCheckPrefixRegex(); + + /// Reads the check file from \p Buffer and records the expected strings it + /// contains. Errors are reported against \p SM. + /// + /// Only expected strings whose prefix is one of those listed in \p PrefixRE + /// are recorded. \returns true in case of an error, false otherwise. + bool readCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE); + + bool ValidateCheckPrefixes(); + + /// Canonicalizes whitespaces in the file. Line endings are replaced with + /// UNIX-style '\n'. + StringRef CanonicalizeFile(MemoryBuffer &MB, + SmallVectorImpl &OutputBuffer); + + /// Checks the input to FileCheck provided in the \p Buffer against the + /// expected strings read from the check file and record diagnostics emitted + /// in \p Diags. Errors are recorded against \p SM. + /// + /// \returns false if the input fails to satisfy the checks. + bool checkInput(SourceMgr &SM, StringRef Buffer, + std::vector *Diags = nullptr); +}; + +} // namespace llvm + +#endif diff --git a/lib/Support/FileCheck.cpp b/lib/Support/FileCheck.cpp index fb4bbba033b..c3f537b3524 100644 --- a/lib/Support/FileCheck.cpp +++ b/lib/Support/FileCheck.cpp @@ -1,1990 +1,1985 @@ -//===- FileCheck.cpp - Check that File's Contents match what is expected --===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// FileCheck does a line-by line check of a file that validates whether it -// contains the expected content. This is useful for regression tests etc. -// -// This file implements most of the API that will be used by the FileCheck utility -// as well as various unittests. -//===----------------------------------------------------------------------===// - -#include "llvm/Support/FileCheck.h" -#include "FileCheckImpl.h" -#include "llvm/ADT/StringSet.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Support/FormatVariadic.h" -#include -#include -#include -#include - -using namespace llvm; - -Expected FileCheckNumericVariableUse::eval() const { - Optional Value = NumericVariable->getValue(); - if (Value) - return *Value; - - return make_error(Name); -} - -Expected FileCheckASTBinop::eval() const { - Expected LeftOp = LeftOperand->eval(); - Expected RightOp = RightOperand->eval(); - - // Bubble up any error (e.g. undefined variables) in the recursive - // evaluation. - if (!LeftOp || !RightOp) { - Error Err = Error::success(); - if (!LeftOp) - Err = joinErrors(std::move(Err), LeftOp.takeError()); - if (!RightOp) - Err = joinErrors(std::move(Err), RightOp.takeError()); - return std::move(Err); - } - - return EvalBinop(*LeftOp, *RightOp); -} - -Expected FileCheckNumericSubstitution::getResult() const { - Expected EvaluatedValue = ExpressionAST->eval(); - if (!EvaluatedValue) - return EvaluatedValue.takeError(); - return utostr(*EvaluatedValue); -} - -Expected FileCheckStringSubstitution::getResult() const { - // Look up the value and escape it so that we can put it into the regex. - Expected VarVal = Context->getPatternVarValue(FromStr); - if (!VarVal) - return VarVal.takeError(); - return Regex::escape(*VarVal); -} - -bool FileCheckPattern::isValidVarNameStart(char C) { - return C == '_' || isalpha(C); -} - -Expected -FileCheckPattern::parseVariable(StringRef &Str, const SourceMgr &SM) { - if (Str.empty()) - return FileCheckErrorDiagnostic::get(SM, Str, "empty variable name"); - - bool ParsedOneChar = false; - unsigned I = 0; - bool IsPseudo = Str[0] == '@'; - - // Global vars start with '$'. - if (Str[0] == '$' || IsPseudo) - ++I; - - for (unsigned E = Str.size(); I != E; ++I) { - if (!ParsedOneChar && !isValidVarNameStart(Str[I])) - return FileCheckErrorDiagnostic::get(SM, Str, "invalid variable name"); - - // Variable names are composed of alphanumeric characters and underscores. - if (Str[I] != '_' && !isalnum(Str[I])) - break; - ParsedOneChar = true; - } - - StringRef Name = Str.take_front(I); - Str = Str.substr(I); - return VariableProperties {Name, IsPseudo}; -} - -// StringRef holding all characters considered as horizontal whitespaces by -// FileCheck input canonicalization. -constexpr StringLiteral SpaceChars = " \t"; - -// Parsing helper function that strips the first character in S and returns it. -static char popFront(StringRef &S) { - char C = S.front(); - S = S.drop_front(); - return C; -} - -char FileCheckUndefVarError::ID = 0; -char FileCheckErrorDiagnostic::ID = 0; -char FileCheckNotFoundError::ID = 0; - -Expected -FileCheckPattern::parseNumericVariableDefinition( - StringRef &Expr, FileCheckPatternContext *Context, - Optional LineNumber, const SourceMgr &SM) { - Expected ParseVarResult = parseVariable(Expr, SM); - if (!ParseVarResult) - return ParseVarResult.takeError(); - StringRef Name = ParseVarResult->Name; - - if (ParseVarResult->IsPseudo) - return FileCheckErrorDiagnostic::get( - SM, Name, "definition of pseudo numeric variable unsupported"); - - // Detect collisions between string and numeric variables when the latter - // is created later than the former. - if (Context->DefinedVariableTable.find(Name) != - Context->DefinedVariableTable.end()) - return FileCheckErrorDiagnostic::get( - SM, Name, "string variable with name '" + Name + "' already exists"); - - Expr = Expr.ltrim(SpaceChars); - if (!Expr.empty()) - return FileCheckErrorDiagnostic::get( - SM, Expr, "unexpected characters after numeric variable name"); - - FileCheckNumericVariable *DefinedNumericVariable; - auto VarTableIter = Context->GlobalNumericVariableTable.find(Name); - if (VarTableIter != Context->GlobalNumericVariableTable.end()) - DefinedNumericVariable = VarTableIter->second; - else - DefinedNumericVariable = Context->makeNumericVariable(Name, LineNumber); - - return DefinedNumericVariable; -} - -Expected> -FileCheckPattern::parseNumericVariableUse(StringRef Name, bool IsPseudo, - Optional LineNumber, - FileCheckPatternContext *Context, - const SourceMgr &SM) { - if (IsPseudo && !Name.equals("@LINE")) - return FileCheckErrorDiagnostic::get( - SM, Name, "invalid pseudo numeric variable '" + Name + "'"); - - // Numeric variable definitions and uses are parsed in the order in which - // they appear in the CHECK patterns. For each definition, the pointer to the - // class instance of the corresponding numeric variable definition is stored - // in GlobalNumericVariableTable in parsePattern. Therefore, if the pointer - // we get below is null, it means no such variable was defined before. When - // that happens, we create a dummy variable so that parsing can continue. All - // uses of undefined variables, whether string or numeric, are then diagnosed - // in printSubstitutions() after failing to match. - auto VarTableIter = Context->GlobalNumericVariableTable.find(Name); - FileCheckNumericVariable *NumericVariable; - if (VarTableIter != Context->GlobalNumericVariableTable.end()) - NumericVariable = VarTableIter->second; - else { - NumericVariable = Context->makeNumericVariable(Name); - Context->GlobalNumericVariableTable[Name] = NumericVariable; - } - - Optional DefLineNumber = NumericVariable->getDefLineNumber(); - if (DefLineNumber && LineNumber && *DefLineNumber == *LineNumber) - return FileCheckErrorDiagnostic::get( - SM, Name, - "numeric variable '" + Name + - "' defined earlier in the same CHECK directive"); - - return std::make_unique(Name, NumericVariable); -} - -Expected> -FileCheckPattern::parseNumericOperand(StringRef &Expr, AllowedOperand AO, - Optional LineNumber, - FileCheckPatternContext *Context, - const SourceMgr &SM) { - if (AO == AllowedOperand::LineVar || AO == AllowedOperand::Any) { - // Try to parse as a numeric variable use. - Expected ParseVarResult = - parseVariable(Expr, SM); - if (ParseVarResult) - return parseNumericVariableUse(ParseVarResult->Name, - ParseVarResult->IsPseudo, LineNumber, - Context, SM); - if (AO == AllowedOperand::LineVar) - return ParseVarResult.takeError(); - // Ignore the error and retry parsing as a literal. - consumeError(ParseVarResult.takeError()); - } - - // Otherwise, parse it as a literal. - uint64_t LiteralValue; - if (!Expr.consumeInteger(/*Radix=*/10, LiteralValue)) - return std::make_unique(LiteralValue); - - return FileCheckErrorDiagnostic::get(SM, Expr, - "invalid operand format '" + Expr + "'"); -} - -static uint64_t add(uint64_t LeftOp, uint64_t RightOp) { - return LeftOp + RightOp; -} - -static uint64_t sub(uint64_t LeftOp, uint64_t RightOp) { - return LeftOp - RightOp; -} - -Expected> FileCheckPattern::parseBinop( - StringRef &Expr, std::unique_ptr LeftOp, - bool IsLegacyLineExpr, Optional LineNumber, - FileCheckPatternContext *Context, const SourceMgr &SM) { - Expr = Expr.ltrim(SpaceChars); - if (Expr.empty()) - return std::move(LeftOp); - - // Check if this is a supported operation and select a function to perform - // it. - SMLoc OpLoc = SMLoc::getFromPointer(Expr.data()); - char Operator = popFront(Expr); - binop_eval_t EvalBinop; - switch (Operator) { - case '+': - EvalBinop = add; - break; - case '-': - EvalBinop = sub; - break; - default: - return FileCheckErrorDiagnostic::get( - SM, OpLoc, Twine("unsupported operation '") + Twine(Operator) + "'"); - } - - // Parse right operand. - Expr = Expr.ltrim(SpaceChars); - if (Expr.empty()) - return FileCheckErrorDiagnostic::get(SM, Expr, - "missing operand in expression"); - // The second operand in a legacy @LINE expression is always a literal. - AllowedOperand AO = - IsLegacyLineExpr ? AllowedOperand::Literal : AllowedOperand::Any; - Expected> RightOpResult = - parseNumericOperand(Expr, AO, LineNumber, Context, SM); - if (!RightOpResult) - return RightOpResult; - - Expr = Expr.ltrim(SpaceChars); - return std::make_unique(EvalBinop, std::move(LeftOp), - std::move(*RightOpResult)); -} - -Expected> -FileCheckPattern::parseNumericSubstitutionBlock( - StringRef Expr, - Optional &DefinedNumericVariable, - bool IsLegacyLineExpr, Optional LineNumber, - FileCheckPatternContext *Context, const SourceMgr &SM) { - std::unique_ptr ExpressionAST = nullptr; - StringRef DefExpr = StringRef(); - DefinedNumericVariable = None; - // Save variable definition expression if any. - size_t DefEnd = Expr.find(':'); - if (DefEnd != StringRef::npos) { - DefExpr = Expr.substr(0, DefEnd); - Expr = Expr.substr(DefEnd + 1); - } - - // Parse the expression itself. - Expr = Expr.ltrim(SpaceChars); - if (!Expr.empty()) { - // The first operand in a legacy @LINE expression is always the @LINE - // pseudo variable. - AllowedOperand AO = - IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any; - Expected> ParseResult = - parseNumericOperand(Expr, AO, LineNumber, Context, SM); - while (ParseResult && !Expr.empty()) { - ParseResult = parseBinop(Expr, std::move(*ParseResult), IsLegacyLineExpr, - LineNumber, Context, SM); - // Legacy @LINE expressions only allow 2 operands. - if (ParseResult && IsLegacyLineExpr && !Expr.empty()) - return FileCheckErrorDiagnostic::get( - SM, Expr, - "unexpected characters at end of expression '" + Expr + "'"); - } - if (!ParseResult) - return ParseResult; - ExpressionAST = std::move(*ParseResult); - } - - // Parse the numeric variable definition. - if (DefEnd != StringRef::npos) { - DefExpr = DefExpr.ltrim(SpaceChars); - Expected ParseResult = - parseNumericVariableDefinition(DefExpr, Context, LineNumber, SM); - - if (!ParseResult) - return ParseResult.takeError(); - DefinedNumericVariable = *ParseResult; - } - - return std::move(ExpressionAST); -} - -bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix, - SourceMgr &SM, - const FileCheckRequest &Req) { - bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot; - IgnoreCase = Req.IgnoreCase; - - PatternLoc = SMLoc::getFromPointer(PatternStr.data()); - - if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines)) - // Ignore trailing whitespace. - while (!PatternStr.empty() && - (PatternStr.back() == ' ' || PatternStr.back() == '\t')) - PatternStr = PatternStr.substr(0, PatternStr.size() - 1); - - // Check that there is something on the line. - if (PatternStr.empty() && CheckTy != Check::CheckEmpty) { - SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, - "found empty check string with prefix '" + Prefix + ":'"); - return true; - } - - if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) { - SM.PrintMessage( - PatternLoc, SourceMgr::DK_Error, - "found non-empty check string for empty check with prefix '" + Prefix + - ":'"); - return true; - } - - if (CheckTy == Check::CheckEmpty) { - RegExStr = "(\n$)"; - return false; - } - - // Check to see if this is a fixed string, or if it has regex pieces. - if (!MatchFullLinesHere && - (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && - PatternStr.find("[[") == StringRef::npos))) { - FixedStr = PatternStr; - return false; - } - - if (MatchFullLinesHere) { - RegExStr += '^'; - if (!Req.NoCanonicalizeWhiteSpace) - RegExStr += " *"; - } - - // Paren value #0 is for the fully matched string. Any new parenthesized - // values add from there. - unsigned CurParen = 1; - - // Otherwise, there is at least one regex piece. Build up the regex pattern - // by escaping scary characters in fixed strings, building up one big regex. - while (!PatternStr.empty()) { - // RegEx matches. - if (PatternStr.startswith("{{")) { - // This is the start of a regex match. Scan for the }}. - size_t End = PatternStr.find("}}"); - if (End == StringRef::npos) { - SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), - SourceMgr::DK_Error, - "found start of regex string with no end '}}'"); - return true; - } - - // Enclose {{}} patterns in parens just like [[]] even though we're not - // capturing the result for any purpose. This is required in case the - // expression contains an alternation like: CHECK: abc{{x|z}}def. We - // want this to turn into: "abc(x|z)def" not "abcx|zdef". - RegExStr += '('; - ++CurParen; - - if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM)) - return true; - RegExStr += ')'; - - PatternStr = PatternStr.substr(End + 2); - continue; - } - - // String and numeric substitution blocks. Pattern substitution blocks come - // in two forms: [[foo:.*]] and [[foo]]. The former matches .* (or some - // other regex) and assigns it to the string variable 'foo'. The latter - // substitutes foo's value. Numeric substitution blocks recognize the same - // form as string ones, but start with a '#' sign after the double - // brackets. They also accept a combined form which sets a numeric variable - // to the evaluation of an expression. Both string and numeric variable - // names must satisfy the regular expression "[a-zA-Z_][0-9a-zA-Z_]*" to be - // valid, as this helps catch some common errors. - if (PatternStr.startswith("[[")) { - StringRef UnparsedPatternStr = PatternStr.substr(2); - // Find the closing bracket pair ending the match. End is going to be an - // offset relative to the beginning of the match string. - size_t End = FindRegexVarEnd(UnparsedPatternStr, SM); - StringRef MatchStr = UnparsedPatternStr.substr(0, End); - bool IsNumBlock = MatchStr.consume_front("#"); - - if (End == StringRef::npos) { - SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), - SourceMgr::DK_Error, - "Invalid substitution block, no ]] found"); - return true; - } - // Strip the substitution block we are parsing. End points to the start - // of the "]]" closing the expression so account for it in computing the - // index of the first unparsed character. - PatternStr = UnparsedPatternStr.substr(End + 2); - - bool IsDefinition = false; - bool SubstNeeded = false; - // Whether the substitution block is a legacy use of @LINE with string - // substitution block syntax. - bool IsLegacyLineExpr = false; - StringRef DefName; - StringRef SubstStr; - StringRef MatchRegexp; - size_t SubstInsertIdx = RegExStr.size(); - - // Parse string variable or legacy @LINE expression. - if (!IsNumBlock) { - size_t VarEndIdx = MatchStr.find(":"); - size_t SpacePos = MatchStr.substr(0, VarEndIdx).find_first_of(" \t"); - if (SpacePos != StringRef::npos) { - SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data() + SpacePos), - SourceMgr::DK_Error, "unexpected whitespace"); - return true; - } - - // Get the name (e.g. "foo") and verify it is well formed. - StringRef OrigMatchStr = MatchStr; - Expected ParseVarResult = - parseVariable(MatchStr, SM); - if (!ParseVarResult) { - logAllUnhandledErrors(ParseVarResult.takeError(), errs()); - return true; - } - StringRef Name = ParseVarResult->Name; - bool IsPseudo = ParseVarResult->IsPseudo; - - IsDefinition = (VarEndIdx != StringRef::npos); - SubstNeeded = !IsDefinition; - if (IsDefinition) { - if ((IsPseudo || !MatchStr.consume_front(":"))) { - SM.PrintMessage(SMLoc::getFromPointer(Name.data()), - SourceMgr::DK_Error, - "invalid name in string variable definition"); - return true; - } - - // Detect collisions between string and numeric variables when the - // former is created later than the latter. - if (Context->GlobalNumericVariableTable.find(Name) != - Context->GlobalNumericVariableTable.end()) { - SM.PrintMessage( - SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, - "numeric variable with name '" + Name + "' already exists"); - return true; - } - DefName = Name; - MatchRegexp = MatchStr; - } else { - if (IsPseudo) { - MatchStr = OrigMatchStr; - IsLegacyLineExpr = IsNumBlock = true; - } else - SubstStr = Name; - } - } - - // Parse numeric substitution block. - std::unique_ptr ExpressionAST; - Optional DefinedNumericVariable; - if (IsNumBlock) { - Expected> ParseResult = - parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable, - IsLegacyLineExpr, LineNumber, Context, - SM); - if (!ParseResult) { - logAllUnhandledErrors(ParseResult.takeError(), errs()); - return true; - } - ExpressionAST = std::move(*ParseResult); - SubstNeeded = ExpressionAST != nullptr; - if (DefinedNumericVariable) { - IsDefinition = true; - DefName = (*DefinedNumericVariable)->getName(); - } - if (SubstNeeded) - SubstStr = MatchStr; - else - MatchRegexp = "[0-9]+"; - } - - // Handle variable definition: [[:(...)]] and [[#(...):(...)]]. - if (IsDefinition) { - RegExStr += '('; - ++SubstInsertIdx; - - if (IsNumBlock) { - FileCheckNumericVariableMatch NumericVariableDefinition = { - *DefinedNumericVariable, CurParen}; - NumericVariableDefs[DefName] = NumericVariableDefinition; - // This store is done here rather than in match() to allow - // parseNumericVariableUse() to get the pointer to the class instance - // of the right variable definition corresponding to a given numeric - // variable use. - Context->GlobalNumericVariableTable[DefName] = - *DefinedNumericVariable; - } else { - VariableDefs[DefName] = CurParen; - // Mark string variable as defined to detect collisions between - // string and numeric variables in parseNumericVariableUse() and - // defineCmdlineVariables() when the latter is created later than the - // former. We cannot reuse GlobalVariableTable for this by populating - // it with an empty string since we would then lose the ability to - // detect the use of an undefined variable in match(). - Context->DefinedVariableTable[DefName] = true; - } - - ++CurParen; - } - - if (!MatchRegexp.empty() && AddRegExToRegEx(MatchRegexp, CurParen, SM)) - return true; - - if (IsDefinition) - RegExStr += ')'; - - // Handle substitutions: [[foo]] and [[#]]. - if (SubstNeeded) { - // Handle substitution of string variables that were defined earlier on - // the same line by emitting a backreference. Expressions do not - // support substituting a numeric variable defined on the same line. - if (!IsNumBlock && VariableDefs.find(SubstStr) != VariableDefs.end()) { - unsigned CaptureParenGroup = VariableDefs[SubstStr]; - if (CaptureParenGroup < 1 || CaptureParenGroup > 9) { - SM.PrintMessage(SMLoc::getFromPointer(SubstStr.data()), - SourceMgr::DK_Error, - "Can't back-reference more than 9 variables"); - return true; - } - AddBackrefToRegEx(CaptureParenGroup); - } else { - // Handle substitution of string variables ([[]]) defined in - // previous CHECK patterns, and substitution of expressions. - FileCheckSubstitution *Substitution = - IsNumBlock - ? Context->makeNumericSubstitution( - SubstStr, std::move(ExpressionAST), SubstInsertIdx) - : Context->makeStringSubstitution(SubstStr, SubstInsertIdx); - Substitutions.push_back(Substitution); - } - } - } - - // Handle fixed string matches. - // Find the end, which is the start of the next regex. - size_t FixedMatchEnd = PatternStr.find("{{"); - FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); - RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); - PatternStr = PatternStr.substr(FixedMatchEnd); - } - - if (MatchFullLinesHere) { - if (!Req.NoCanonicalizeWhiteSpace) - RegExStr += " *"; - RegExStr += '$'; - } - - return false; -} - -bool FileCheckPattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { - Regex R(RS); - std::string Error; - if (!R.isValid(Error)) { - SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, - "invalid regex: " + Error); - return true; - } - - RegExStr += RS.str(); - CurParen += R.getNumMatches(); - return false; -} - -void FileCheckPattern::AddBackrefToRegEx(unsigned BackrefNum) { - assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); - std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum); - RegExStr += Backref; -} - -Expected FileCheckPattern::match(StringRef Buffer, size_t &MatchLen, - const SourceMgr &SM) const { - // If this is the EOF pattern, match it immediately. - if (CheckTy == Check::CheckEOF) { - MatchLen = 0; - return Buffer.size(); - } - - // If this is a fixed string pattern, just match it now. - if (!FixedStr.empty()) { - MatchLen = FixedStr.size(); - size_t Pos = IgnoreCase ? Buffer.find_lower(FixedStr) - : Buffer.find(FixedStr); - if (Pos == StringRef::npos) - return make_error(); - return Pos; - } - - // Regex match. - - // If there are substitutions, we need to create a temporary string with the - // actual value. - StringRef RegExToMatch = RegExStr; - std::string TmpStr; - if (!Substitutions.empty()) { - TmpStr = RegExStr; - if (LineNumber) - Context->LineVariable->setValue(*LineNumber); - - size_t InsertOffset = 0; - // Substitute all string variables and expressions whose values are only - // now known. Use of string variables defined on the same line are handled - // by back-references. - for (const auto &Substitution : Substitutions) { - // Substitute and check for failure (e.g. use of undefined variable). - Expected Value = Substitution->getResult(); - if (!Value) - return Value.takeError(); - - // Plop it into the regex at the adjusted offset. - TmpStr.insert(TmpStr.begin() + Substitution->getIndex() + InsertOffset, - Value->begin(), Value->end()); - InsertOffset += Value->size(); - } - - // Match the newly constructed regex. - RegExToMatch = TmpStr; - } - - SmallVector MatchInfo; - unsigned int Flags = Regex::Newline; - if (IgnoreCase) - Flags |= Regex::IgnoreCase; - if (!Regex(RegExToMatch, Flags).match(Buffer, &MatchInfo)) - return make_error(); - - // Successful regex match. - assert(!MatchInfo.empty() && "Didn't get any match"); - StringRef FullMatch = MatchInfo[0]; - - // If this defines any string variables, remember their values. - for (const auto &VariableDef : VariableDefs) { - assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); - Context->GlobalVariableTable[VariableDef.first] = - MatchInfo[VariableDef.second]; - } - - // If this defines any numeric variables, remember their values. - for (const auto &NumericVariableDef : NumericVariableDefs) { - const FileCheckNumericVariableMatch &NumericVariableMatch = - NumericVariableDef.getValue(); - unsigned CaptureParenGroup = NumericVariableMatch.CaptureParenGroup; - assert(CaptureParenGroup < MatchInfo.size() && "Internal paren error"); - FileCheckNumericVariable *DefinedNumericVariable = - NumericVariableMatch.DefinedNumericVariable; - - StringRef MatchedValue = MatchInfo[CaptureParenGroup]; - uint64_t Val; - if (MatchedValue.getAsInteger(10, Val)) - return FileCheckErrorDiagnostic::get(SM, MatchedValue, - "Unable to represent numeric value"); - DefinedNumericVariable->setValue(Val); - } - - // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after - // the required preceding newline, which is consumed by the pattern in the - // case of CHECK-EMPTY but not CHECK-NEXT. - size_t MatchStartSkip = CheckTy == Check::CheckEmpty; - MatchLen = FullMatch.size() - MatchStartSkip; - return FullMatch.data() - Buffer.data() + MatchStartSkip; -} - -unsigned FileCheckPattern::computeMatchDistance(StringRef Buffer) const { - // Just compute the number of matching characters. For regular expressions, we - // just compare against the regex itself and hope for the best. - // - // FIXME: One easy improvement here is have the regex lib generate a single - // example regular expression which matches, and use that as the example - // string. - StringRef ExampleString(FixedStr); - if (ExampleString.empty()) - ExampleString = RegExStr; - - // Only compare up to the first line in the buffer, or the string size. - StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); - BufferPrefix = BufferPrefix.split('\n').first; - return BufferPrefix.edit_distance(ExampleString); -} - -void FileCheckPattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer, - SMRange MatchRange) const { - // Print what we know about substitutions. - if (!Substitutions.empty()) { - for (const auto &Substitution : Substitutions) { - SmallString<256> Msg; - raw_svector_ostream OS(Msg); - Expected MatchedValue = Substitution->getResult(); - - // Substitution failed or is not known at match time, print the undefined - // variables it uses. - if (!MatchedValue) { - bool UndefSeen = false; - handleAllErrors(MatchedValue.takeError(), - [](const FileCheckNotFoundError &E) {}, - // Handled in PrintNoMatch(). - [](const FileCheckErrorDiagnostic &E) {}, - [&](const FileCheckUndefVarError &E) { - if (!UndefSeen) { - OS << "uses undefined variable(s):"; - UndefSeen = true; - } - OS << " "; - E.log(OS); - }); - } else { - // Substitution succeeded. Print substituted value. - OS << "with \""; - OS.write_escaped(Substitution->getFromString()) << "\" equal to \""; - OS.write_escaped(*MatchedValue) << "\""; - } - - if (MatchRange.isValid()) - SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, OS.str(), - {MatchRange}); - else - SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), - SourceMgr::DK_Note, OS.str()); - } - } -} - -static SMRange ProcessMatchResult(FileCheckDiag::MatchType MatchTy, - const SourceMgr &SM, SMLoc Loc, - Check::FileCheckType CheckTy, - StringRef Buffer, size_t Pos, size_t Len, - std::vector *Diags, - bool AdjustPrevDiag = false) { - SMLoc Start = SMLoc::getFromPointer(Buffer.data() + Pos); - SMLoc End = SMLoc::getFromPointer(Buffer.data() + Pos + Len); - SMRange Range(Start, End); - if (Diags) { - if (AdjustPrevDiag) - Diags->rbegin()->MatchTy = MatchTy; - else - Diags->emplace_back(SM, CheckTy, Loc, MatchTy, Range); - } - return Range; -} - -void FileCheckPattern::printFuzzyMatch( - const SourceMgr &SM, StringRef Buffer, - std::vector *Diags) const { - // Attempt to find the closest/best fuzzy match. Usually an error happens - // because some string in the output didn't exactly match. In these cases, we - // would like to show the user a best guess at what "should have" matched, to - // save them having to actually check the input manually. - size_t NumLinesForward = 0; - size_t Best = StringRef::npos; - double BestQuality = 0; - - // Use an arbitrary 4k limit on how far we will search. - for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { - if (Buffer[i] == '\n') - ++NumLinesForward; - - // Patterns have leading whitespace stripped, so skip whitespace when - // looking for something which looks like a pattern. - if (Buffer[i] == ' ' || Buffer[i] == '\t') - continue; - - // Compute the "quality" of this match as an arbitrary combination of the - // match distance and the number of lines skipped to get to this match. - unsigned Distance = computeMatchDistance(Buffer.substr(i)); - double Quality = Distance + (NumLinesForward / 100.); - - if (Quality < BestQuality || Best == StringRef::npos) { - Best = i; - BestQuality = Quality; - } - } - - // Print the "possible intended match here" line if we found something - // reasonable and not equal to what we showed in the "scanning from here" - // line. - if (Best && Best != StringRef::npos && BestQuality < 50) { - SMRange MatchRange = - ProcessMatchResult(FileCheckDiag::MatchFuzzy, SM, getLoc(), - getCheckTy(), Buffer, Best, 0, Diags); - SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, - "possible intended match here"); - - // FIXME: If we wanted to be really friendly we would show why the match - // failed, as it can be hard to spot simple one character differences. - } -} - -Expected -FileCheckPatternContext::getPatternVarValue(StringRef VarName) { - auto VarIter = GlobalVariableTable.find(VarName); - if (VarIter == GlobalVariableTable.end()) - return make_error(VarName); - - return VarIter->second; -} - -template -FileCheckNumericVariable * -FileCheckPatternContext::makeNumericVariable(Types... args) { - NumericVariables.push_back( - std::make_unique(args...)); - return NumericVariables.back().get(); -} - -FileCheckSubstitution * -FileCheckPatternContext::makeStringSubstitution(StringRef VarName, - size_t InsertIdx) { - Substitutions.push_back( - std::make_unique(this, VarName, InsertIdx)); - return Substitutions.back().get(); -} - -FileCheckSubstitution *FileCheckPatternContext::makeNumericSubstitution( - StringRef ExpressionStr, - std::unique_ptr ExpressionAST, size_t InsertIdx) { - Substitutions.push_back(std::make_unique( - this, ExpressionStr, std::move(ExpressionAST), InsertIdx)); - return Substitutions.back().get(); -} - -size_t FileCheckPattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { - // Offset keeps track of the current offset within the input Str - size_t Offset = 0; - // [...] Nesting depth - size_t BracketDepth = 0; - - while (!Str.empty()) { - if (Str.startswith("]]") && BracketDepth == 0) - return Offset; - if (Str[0] == '\\') { - // Backslash escapes the next char within regexes, so skip them both. - Str = Str.substr(2); - Offset += 2; - } else { - switch (Str[0]) { - default: - break; - case '[': - BracketDepth++; - break; - case ']': - if (BracketDepth == 0) { - SM.PrintMessage(SMLoc::getFromPointer(Str.data()), - SourceMgr::DK_Error, - "missing closing \"]\" for regex variable"); - exit(1); - } - BracketDepth--; - break; - } - Str = Str.substr(1); - Offset++; - } - } - - return StringRef::npos; -} - -StringRef FileCheck::CanonicalizeFile(MemoryBuffer &MB, - SmallVectorImpl &OutputBuffer) { - OutputBuffer.reserve(MB.getBufferSize()); - - for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd(); - Ptr != End; ++Ptr) { - // Eliminate trailing dosish \r. - if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { - continue; - } - - // If current char is not a horizontal whitespace or if horizontal - // whitespace canonicalization is disabled, dump it to output as is. - if (Req.NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) { - OutputBuffer.push_back(*Ptr); - continue; - } - - // Otherwise, add one space and advance over neighboring space. - OutputBuffer.push_back(' '); - while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t')) - ++Ptr; - } - - // Add a null byte and then return all but that byte. - OutputBuffer.push_back('\0'); - return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1); -} - -FileCheckDiag::FileCheckDiag(const SourceMgr &SM, - const Check::FileCheckType &CheckTy, - SMLoc CheckLoc, MatchType MatchTy, - SMRange InputRange) - : CheckTy(CheckTy), MatchTy(MatchTy) { - auto Start = SM.getLineAndColumn(InputRange.Start); - auto End = SM.getLineAndColumn(InputRange.End); - InputStartLine = Start.first; - InputStartCol = Start.second; - InputEndLine = End.first; - InputEndCol = End.second; - Start = SM.getLineAndColumn(CheckLoc); - CheckLine = Start.first; - CheckCol = Start.second; -} - -static bool IsPartOfWord(char c) { - return (isalnum(c) || c == '-' || c == '_'); -} - -Check::FileCheckType &Check::FileCheckType::setCount(int C) { - assert(Count > 0 && "zero and negative counts are not supported"); - assert((C == 1 || Kind == CheckPlain) && - "count supported only for plain CHECK directives"); - Count = C; - return *this; -} - -std::string Check::FileCheckType::getDescription(StringRef Prefix) const { - switch (Kind) { - case Check::CheckNone: - return "invalid"; - case Check::CheckPlain: - if (Count > 1) - return Prefix.str() + "-COUNT"; - return Prefix; - case Check::CheckNext: - return Prefix.str() + "-NEXT"; - case Check::CheckSame: - return Prefix.str() + "-SAME"; - case Check::CheckNot: - return Prefix.str() + "-NOT"; - case Check::CheckDAG: - return Prefix.str() + "-DAG"; - case Check::CheckLabel: - return Prefix.str() + "-LABEL"; - case Check::CheckEmpty: - return Prefix.str() + "-EMPTY"; - case Check::CheckEOF: - return "implicit EOF"; - case Check::CheckBadNot: - return "bad NOT"; - case Check::CheckBadCount: - return "bad COUNT"; - } - llvm_unreachable("unknown FileCheckType"); -} - -static std::pair -FindCheckType(StringRef Buffer, StringRef Prefix) { - if (Buffer.size() <= Prefix.size()) - return {Check::CheckNone, StringRef()}; - - char NextChar = Buffer[Prefix.size()]; - - StringRef Rest = Buffer.drop_front(Prefix.size() + 1); - // Verify that the : is present after the prefix. - if (NextChar == ':') - return {Check::CheckPlain, Rest}; - - if (NextChar != '-') - return {Check::CheckNone, StringRef()}; - - if (Rest.consume_front("COUNT-")) { - int64_t Count; - if (Rest.consumeInteger(10, Count)) - // Error happened in parsing integer. - return {Check::CheckBadCount, Rest}; - if (Count <= 0 || Count > INT32_MAX) - return {Check::CheckBadCount, Rest}; - if (!Rest.consume_front(":")) - return {Check::CheckBadCount, Rest}; - return {Check::FileCheckType(Check::CheckPlain).setCount(Count), Rest}; - } - - if (Rest.consume_front("NEXT:")) - return {Check::CheckNext, Rest}; - - if (Rest.consume_front("SAME:")) - return {Check::CheckSame, Rest}; - - if (Rest.consume_front("NOT:")) - return {Check::CheckNot, Rest}; - - if (Rest.consume_front("DAG:")) - return {Check::CheckDAG, Rest}; - - if (Rest.consume_front("LABEL:")) - return {Check::CheckLabel, Rest}; - - if (Rest.consume_front("EMPTY:")) - return {Check::CheckEmpty, Rest}; - - // You can't combine -NOT with another suffix. - if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || - Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || - Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") || - Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:")) - return {Check::CheckBadNot, Rest}; - - return {Check::CheckNone, Rest}; -} - -// From the given position, find the next character after the word. -static size_t SkipWord(StringRef Str, size_t Loc) { - while (Loc < Str.size() && IsPartOfWord(Str[Loc])) - ++Loc; - return Loc; -} - -/// Searches the buffer for the first prefix in the prefix regular expression. -/// -/// This searches the buffer using the provided regular expression, however it -/// enforces constraints beyond that: -/// 1) The found prefix must not be a suffix of something that looks like -/// a valid prefix. -/// 2) The found prefix must be followed by a valid check type suffix using \c -/// FindCheckType above. -/// -/// \returns a pair of StringRefs into the Buffer, which combines: -/// - the first match of the regular expression to satisfy these two is -/// returned, -/// otherwise an empty StringRef is returned to indicate failure. -/// - buffer rewound to the location right after parsed suffix, for parsing -/// to continue from -/// -/// If this routine returns a valid prefix, it will also shrink \p Buffer to -/// start at the beginning of the returned prefix, increment \p LineNumber for -/// each new line consumed from \p Buffer, and set \p CheckTy to the type of -/// check found by examining the suffix. -/// -/// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy -/// is unspecified. -static std::pair -FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer, - unsigned &LineNumber, Check::FileCheckType &CheckTy) { - SmallVector Matches; - - while (!Buffer.empty()) { - // Find the first (longest) match using the RE. - if (!PrefixRE.match(Buffer, &Matches)) - // No match at all, bail. - return {StringRef(), StringRef()}; - - StringRef Prefix = Matches[0]; - Matches.clear(); - - assert(Prefix.data() >= Buffer.data() && - Prefix.data() < Buffer.data() + Buffer.size() && - "Prefix doesn't start inside of buffer!"); - size_t Loc = Prefix.data() - Buffer.data(); - StringRef Skipped = Buffer.substr(0, Loc); - Buffer = Buffer.drop_front(Loc); - LineNumber += Skipped.count('\n'); - - // Check that the matched prefix isn't a suffix of some other check-like - // word. - // FIXME: This is a very ad-hoc check. it would be better handled in some - // other way. Among other things it seems hard to distinguish between - // intentional and unintentional uses of this feature. - if (Skipped.empty() || !IsPartOfWord(Skipped.back())) { - // Now extract the type. - StringRef AfterSuffix; - std::tie(CheckTy, AfterSuffix) = FindCheckType(Buffer, Prefix); - - // If we've found a valid check type for this prefix, we're done. - if (CheckTy != Check::CheckNone) - return {Prefix, AfterSuffix}; - } - - // If we didn't successfully find a prefix, we need to skip this invalid - // prefix and continue scanning. We directly skip the prefix that was - // matched and any additional parts of that check-like word. - Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size())); - } - - // We ran out of buffer while skipping partial matches so give up. - return {StringRef(), StringRef()}; -} - -void FileCheckPatternContext::createLineVariable() { - assert(!LineVariable && "@LINE pseudo numeric variable already created"); - StringRef LineName = "@LINE"; - LineVariable = makeNumericVariable(LineName); - GlobalNumericVariableTable[LineName] = LineVariable; -} - -FileCheck::FileCheck(FileCheckRequest Req) - : Req(Req), PatternContext(std::make_unique()), - CheckStrings(std::make_unique>()) {} - -FileCheck::~FileCheck() = default; - -bool FileCheck::readCheckFile(SourceMgr &SM, StringRef Buffer, - Regex &PrefixRE) { - Error DefineError = - PatternContext->defineCmdlineVariables(Req.GlobalDefines, SM); - if (DefineError) { - logAllUnhandledErrors(std::move(DefineError), errs()); - return true; - } - - PatternContext->createLineVariable(); - - std::vector ImplicitNegativeChecks; - for (const auto &PatternString : Req.ImplicitCheckNot) { - // Create a buffer with fake command line content in order to display the - // command line option responsible for the specific implicit CHECK-NOT. - std::string Prefix = "-implicit-check-not='"; - std::string Suffix = "'"; - std::unique_ptr CmdLine = MemoryBuffer::getMemBufferCopy( - Prefix + PatternString + Suffix, "command line"); - - StringRef PatternInBuffer = - CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); - SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); - - ImplicitNegativeChecks.push_back( - FileCheckPattern(Check::CheckNot, PatternContext.get())); - ImplicitNegativeChecks.back().parsePattern(PatternInBuffer, - "IMPLICIT-CHECK", SM, Req); - } - - std::vector DagNotMatches = ImplicitNegativeChecks; - - // LineNumber keeps track of the line on which CheckPrefix instances are - // found. - unsigned LineNumber = 1; - - while (1) { - Check::FileCheckType CheckTy; - - // See if a prefix occurs in the memory buffer. - StringRef UsedPrefix; - StringRef AfterSuffix; - std::tie(UsedPrefix, AfterSuffix) = - FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber, CheckTy); - if (UsedPrefix.empty()) - break; - assert(UsedPrefix.data() == Buffer.data() && - "Failed to move Buffer's start forward, or pointed prefix outside " - "of the buffer!"); - assert(AfterSuffix.data() >= Buffer.data() && - AfterSuffix.data() < Buffer.data() + Buffer.size() && - "Parsing after suffix doesn't start inside of buffer!"); - - // Location to use for error messages. - const char *UsedPrefixStart = UsedPrefix.data(); - - // Skip the buffer to the end of parsed suffix (or just prefix, if no good - // suffix was processed). - Buffer = AfterSuffix.empty() ? Buffer.drop_front(UsedPrefix.size()) - : AfterSuffix; - - // Complain about useful-looking but unsupported suffixes. - if (CheckTy == Check::CheckBadNot) { - SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, - "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); - return true; - } - - // Complain about invalid count specification. - if (CheckTy == Check::CheckBadCount) { - SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, - "invalid count in -COUNT specification on prefix '" + - UsedPrefix + "'"); - return true; - } - - // Okay, we found the prefix, yay. Remember the rest of the line, but ignore - // leading whitespace. - if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines)) - Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); - - // Scan ahead to the end of line. - size_t EOL = Buffer.find_first_of("\n\r"); - - // Remember the location of the start of the pattern, for diagnostics. - SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); - - // Parse the pattern. - FileCheckPattern P(CheckTy, PatternContext.get(), LineNumber); - if (P.parsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, Req)) - return true; - - // Verify that CHECK-LABEL lines do not define or use variables - if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { - SM.PrintMessage( - SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error, - "found '" + UsedPrefix + "-LABEL:'" - " with variable definition or use"); - return true; - } - - Buffer = Buffer.substr(EOL); - - // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them. - if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame || - CheckTy == Check::CheckEmpty) && - CheckStrings->empty()) { - StringRef Type = CheckTy == Check::CheckNext - ? "NEXT" - : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME"; - SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), - SourceMgr::DK_Error, - "found '" + UsedPrefix + "-" + Type + - "' without previous '" + UsedPrefix + ": line"); - return true; - } - - // Handle CHECK-DAG/-NOT. - if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { - DagNotMatches.push_back(P); - continue; - } - - // Okay, add the string we captured to the output vector and move on. - CheckStrings->emplace_back(P, UsedPrefix, PatternLoc); - std::swap(DagNotMatches, CheckStrings->back().DagNotStrings); - DagNotMatches = ImplicitNegativeChecks; - } - - // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first - // prefix as a filler for the error message. - if (!DagNotMatches.empty()) { - CheckStrings->emplace_back( - FileCheckPattern(Check::CheckEOF, PatternContext.get(), LineNumber + 1), - *Req.CheckPrefixes.begin(), SMLoc::getFromPointer(Buffer.data())); - std::swap(DagNotMatches, CheckStrings->back().DagNotStrings); - } - - if (CheckStrings->empty()) { - errs() << "error: no check strings found with prefix" - << (Req.CheckPrefixes.size() > 1 ? "es " : " "); - auto I = Req.CheckPrefixes.begin(); - auto E = Req.CheckPrefixes.end(); - if (I != E) { - errs() << "\'" << *I << ":'"; - ++I; - } - for (; I != E; ++I) - errs() << ", \'" << *I << ":'"; - - errs() << '\n'; - return true; - } - - return false; -} - -static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM, - StringRef Prefix, SMLoc Loc, const FileCheckPattern &Pat, - int MatchedCount, StringRef Buffer, size_t MatchPos, - size_t MatchLen, const FileCheckRequest &Req, - std::vector *Diags) { - bool PrintDiag = true; - if (ExpectedMatch) { - if (!Req.Verbose) - return; - if (!Req.VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF) - return; - // Due to their verbosity, we don't print verbose diagnostics here if we're - // gathering them for a different rendering, but we always print other - // diagnostics. - PrintDiag = !Diags; - } - SMRange MatchRange = ProcessMatchResult( - ExpectedMatch ? FileCheckDiag::MatchFoundAndExpected - : FileCheckDiag::MatchFoundButExcluded, - SM, Loc, Pat.getCheckTy(), Buffer, MatchPos, MatchLen, Diags); - if (!PrintDiag) - return; - - std::string Message = formatv("{0}: {1} string found in input", - Pat.getCheckTy().getDescription(Prefix), - (ExpectedMatch ? "expected" : "excluded")) - .str(); - if (Pat.getCount() > 1) - Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str(); - - SM.PrintMessage( - Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error, Message); - SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, "found here", - {MatchRange}); - Pat.printSubstitutions(SM, Buffer, MatchRange); -} - -static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM, - const FileCheckString &CheckStr, int MatchedCount, - StringRef Buffer, size_t MatchPos, size_t MatchLen, - FileCheckRequest &Req, - std::vector *Diags) { - PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat, - MatchedCount, Buffer, MatchPos, MatchLen, Req, Diags); -} - -static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM, - StringRef Prefix, SMLoc Loc, - const FileCheckPattern &Pat, int MatchedCount, - StringRef Buffer, bool VerboseVerbose, - std::vector *Diags, Error MatchErrors) { - assert(MatchErrors && "Called on successful match"); - bool PrintDiag = true; - if (!ExpectedMatch) { - if (!VerboseVerbose) { - consumeError(std::move(MatchErrors)); - return; - } - // Due to their verbosity, we don't print verbose diagnostics here if we're - // gathering them for a different rendering, but we always print other - // diagnostics. - PrintDiag = !Diags; - } - - // If the current position is at the end of a line, advance to the start of - // the next line. - Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); - SMRange SearchRange = ProcessMatchResult( - ExpectedMatch ? FileCheckDiag::MatchNoneButExpected - : FileCheckDiag::MatchNoneAndExcluded, - SM, Loc, Pat.getCheckTy(), Buffer, 0, Buffer.size(), Diags); - if (!PrintDiag) { - consumeError(std::move(MatchErrors)); - return; - } - - MatchErrors = - handleErrors(std::move(MatchErrors), - [](const FileCheckErrorDiagnostic &E) { E.log(errs()); }); - - // No problem matching the string per se. - if (!MatchErrors) - return; - consumeError(std::move(MatchErrors)); - - // Print "not found" diagnostic. - std::string Message = formatv("{0}: {1} string not found in input", - Pat.getCheckTy().getDescription(Prefix), - (ExpectedMatch ? "expected" : "excluded")) - .str(); - if (Pat.getCount() > 1) - Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str(); - SM.PrintMessage( - Loc, ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark, Message); - - // Print the "scanning from here" line. - SM.PrintMessage(SearchRange.Start, SourceMgr::DK_Note, "scanning from here"); - - // Allow the pattern to print additional information if desired. - Pat.printSubstitutions(SM, Buffer); - - if (ExpectedMatch) - Pat.printFuzzyMatch(SM, Buffer, Diags); -} - -static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM, - const FileCheckString &CheckStr, int MatchedCount, - StringRef Buffer, bool VerboseVerbose, - std::vector *Diags, Error MatchErrors) { - PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat, - MatchedCount, Buffer, VerboseVerbose, Diags, - std::move(MatchErrors)); -} - -/// Counts the number of newlines in the specified range. -static unsigned CountNumNewlinesBetween(StringRef Range, - const char *&FirstNewLine) { - unsigned NumNewLines = 0; - while (1) { - // Scan for newline. - Range = Range.substr(Range.find_first_of("\n\r")); - if (Range.empty()) - return NumNewLines; - - ++NumNewLines; - - // Handle \n\r and \r\n as a single newline. - if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') && - (Range[0] != Range[1])) - Range = Range.substr(1); - Range = Range.substr(1); - - if (NumNewLines == 1) - FirstNewLine = Range.begin(); - } -} - -size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer, - bool IsLabelScanMode, size_t &MatchLen, - FileCheckRequest &Req, - std::vector *Diags) const { - size_t LastPos = 0; - std::vector NotStrings; - - // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL - // bounds; we have not processed variable definitions within the bounded block - // yet so cannot handle any final CHECK-DAG yet; this is handled when going - // over the block again (including the last CHECK-LABEL) in normal mode. - if (!IsLabelScanMode) { - // Match "dag strings" (with mixed "not strings" if any). - LastPos = CheckDag(SM, Buffer, NotStrings, Req, Diags); - if (LastPos == StringRef::npos) - return StringRef::npos; - } - - // Match itself from the last position after matching CHECK-DAG. - size_t LastMatchEnd = LastPos; - size_t FirstMatchPos = 0; - // Go match the pattern Count times. Majority of patterns only match with - // count 1 though. - assert(Pat.getCount() != 0 && "pattern count can not be zero"); - for (int i = 1; i <= Pat.getCount(); i++) { - StringRef MatchBuffer = Buffer.substr(LastMatchEnd); - size_t CurrentMatchLen; - // get a match at current start point - Expected MatchResult = Pat.match(MatchBuffer, CurrentMatchLen, SM); - - // report - if (!MatchResult) { - PrintNoMatch(true, SM, *this, i, MatchBuffer, Req.VerboseVerbose, Diags, - MatchResult.takeError()); - return StringRef::npos; - } - size_t MatchPos = *MatchResult; - PrintMatch(true, SM, *this, i, MatchBuffer, MatchPos, CurrentMatchLen, Req, - Diags); - if (i == 1) - FirstMatchPos = LastPos + MatchPos; - - // move start point after the match - LastMatchEnd += MatchPos + CurrentMatchLen; - } - // Full match len counts from first match pos. - MatchLen = LastMatchEnd - FirstMatchPos; - - // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT - // or CHECK-NOT - if (!IsLabelScanMode) { - size_t MatchPos = FirstMatchPos - LastPos; - StringRef MatchBuffer = Buffer.substr(LastPos); - StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); - - // If this check is a "CHECK-NEXT", verify that the previous match was on - // the previous line (i.e. that there is one newline between them). - if (CheckNext(SM, SkippedRegion)) { - ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc, - Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen, - Diags, Req.Verbose); - return StringRef::npos; - } - - // If this check is a "CHECK-SAME", verify that the previous match was on - // the same line (i.e. that there is no newline between them). - if (CheckSame(SM, SkippedRegion)) { - ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc, - Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen, - Diags, Req.Verbose); - return StringRef::npos; - } - - // If this match had "not strings", verify that they don't exist in the - // skipped region. - if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags)) - return StringRef::npos; - } - - return FirstMatchPos; -} - -bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { - if (Pat.getCheckTy() != Check::CheckNext && - Pat.getCheckTy() != Check::CheckEmpty) - return false; - - Twine CheckName = - Prefix + - Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT"); - - // Count the number of newlines between the previous match and this one. - const char *FirstNewLine = nullptr; - unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); - - if (NumNewLines == 0) { - SM.PrintMessage(Loc, SourceMgr::DK_Error, - CheckName + ": is on the same line as previous match"); - SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, - "'next' match was here"); - SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, - "previous match ended here"); - return true; - } - - if (NumNewLines != 1) { - SM.PrintMessage(Loc, SourceMgr::DK_Error, - CheckName + - ": is not on the line after the previous match"); - SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, - "'next' match was here"); - SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, - "previous match ended here"); - SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, - "non-matching line after previous match is here"); - return true; - } - - return false; -} - -bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { - if (Pat.getCheckTy() != Check::CheckSame) - return false; - - // Count the number of newlines between the previous match and this one. - const char *FirstNewLine = nullptr; - unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); - - if (NumNewLines != 0) { - SM.PrintMessage(Loc, SourceMgr::DK_Error, - Prefix + - "-SAME: is not on the same line as the previous match"); - SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, - "'next' match was here"); - SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, - "previous match ended here"); - return true; - } - - return false; -} - -bool FileCheckString::CheckNot( - const SourceMgr &SM, StringRef Buffer, - const std::vector &NotStrings, - const FileCheckRequest &Req, std::vector *Diags) const { - for (const FileCheckPattern *Pat : NotStrings) { - assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); - - size_t MatchLen = 0; - Expected MatchResult = Pat->match(Buffer, MatchLen, SM); - - if (!MatchResult) { - PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, - Req.VerboseVerbose, Diags, MatchResult.takeError()); - continue; - } - size_t Pos = *MatchResult; - - PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, Pos, MatchLen, - Req, Diags); - - return true; - } - - return false; -} - -size_t -FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, - std::vector &NotStrings, - const FileCheckRequest &Req, - std::vector *Diags) const { - if (DagNotStrings.empty()) - return 0; - - // The start of the search range. - size_t StartPos = 0; - - struct MatchRange { - size_t Pos; - size_t End; - }; - // A sorted list of ranges for non-overlapping CHECK-DAG matches. Match - // ranges are erased from this list once they are no longer in the search - // range. - std::list MatchRanges; - - // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG - // group, so we don't use a range-based for loop here. - for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end(); - PatItr != PatEnd; ++PatItr) { - const FileCheckPattern &Pat = *PatItr; - assert((Pat.getCheckTy() == Check::CheckDAG || - Pat.getCheckTy() == Check::CheckNot) && - "Invalid CHECK-DAG or CHECK-NOT!"); - - if (Pat.getCheckTy() == Check::CheckNot) { - NotStrings.push_back(&Pat); - continue; - } - - assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); - - // CHECK-DAG always matches from the start. - size_t MatchLen = 0, MatchPos = StartPos; - - // Search for a match that doesn't overlap a previous match in this - // CHECK-DAG group. - for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) { - StringRef MatchBuffer = Buffer.substr(MatchPos); - Expected MatchResult = Pat.match(MatchBuffer, MatchLen, SM); - // With a group of CHECK-DAGs, a single mismatching means the match on - // that group of CHECK-DAGs fails immediately. - if (!MatchResult) { - PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, MatchBuffer, - Req.VerboseVerbose, Diags, MatchResult.takeError()); - return StringRef::npos; - } - size_t MatchPosBuf = *MatchResult; - // Re-calc it as the offset relative to the start of the original string. - MatchPos += MatchPosBuf; - if (Req.VerboseVerbose) - PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos, - MatchLen, Req, Diags); - MatchRange M{MatchPos, MatchPos + MatchLen}; - if (Req.AllowDeprecatedDagOverlap) { - // We don't need to track all matches in this mode, so we just maintain - // one match range that encompasses the current CHECK-DAG group's - // matches. - if (MatchRanges.empty()) - MatchRanges.insert(MatchRanges.end(), M); - else { - auto Block = MatchRanges.begin(); - Block->Pos = std::min(Block->Pos, M.Pos); - Block->End = std::max(Block->End, M.End); - } - break; - } - // Iterate previous matches until overlapping match or insertion point. - bool Overlap = false; - for (; MI != ME; ++MI) { - if (M.Pos < MI->End) { - // !Overlap => New match has no overlap and is before this old match. - // Overlap => New match overlaps this old match. - Overlap = MI->Pos < M.End; - break; - } - } - if (!Overlap) { - // Insert non-overlapping match into list. - MatchRanges.insert(MI, M); - break; - } - if (Req.VerboseVerbose) { - // Due to their verbosity, we don't print verbose diagnostics here if - // we're gathering them for a different rendering, but we always print - // other diagnostics. - if (!Diags) { - SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos); - SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End); - SMRange OldRange(OldStart, OldEnd); - SM.PrintMessage(OldStart, SourceMgr::DK_Note, - "match discarded, overlaps earlier DAG match here", - {OldRange}); - } else - Diags->rbegin()->MatchTy = FileCheckDiag::MatchFoundButDiscarded; - } - MatchPos = MI->End; - } - if (!Req.VerboseVerbose) - PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos, - MatchLen, Req, Diags); - - // Handle the end of a CHECK-DAG group. - if (std::next(PatItr) == PatEnd || - std::next(PatItr)->getCheckTy() == Check::CheckNot) { - if (!NotStrings.empty()) { - // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to - // CHECK-DAG, verify that there are no 'not' strings occurred in that - // region. - StringRef SkippedRegion = - Buffer.slice(StartPos, MatchRanges.begin()->Pos); - if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags)) - return StringRef::npos; - // Clear "not strings". - NotStrings.clear(); - } - // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the - // end of this CHECK-DAG group's match range. - StartPos = MatchRanges.rbegin()->End; - // Don't waste time checking for (impossible) overlaps before that. - MatchRanges.clear(); - } - } - - return StartPos; -} - -// A check prefix must contain only alphanumeric, hyphens and underscores. -static bool ValidateCheckPrefix(StringRef CheckPrefix) { - static const Regex Validator("^[a-zA-Z0-9_-]*$"); - return Validator.match(CheckPrefix); -} - -bool FileCheck::ValidateCheckPrefixes() { - StringSet<> PrefixSet; - - for (StringRef Prefix : Req.CheckPrefixes) { - // Reject empty prefixes. - if (Prefix == "") - return false; - - if (!PrefixSet.insert(Prefix).second) - return false; - - if (!ValidateCheckPrefix(Prefix)) - return false; - } - - return true; -} - -Regex FileCheck::buildCheckPrefixRegex() { - // I don't think there's a way to specify an initial value for cl::list, - // so if nothing was specified, add the default - if (Req.CheckPrefixes.empty()) - Req.CheckPrefixes.push_back("CHECK"); - - // We already validated the contents of CheckPrefixes so just concatenate - // them as alternatives. - SmallString<32> PrefixRegexStr; - for (StringRef Prefix : Req.CheckPrefixes) { - if (Prefix != Req.CheckPrefixes.front()) - PrefixRegexStr.push_back('|'); - - PrefixRegexStr.append(Prefix); - } - - return Regex(PrefixRegexStr); -} - -Error FileCheckPatternContext::defineCmdlineVariables( - std::vector &CmdlineDefines, SourceMgr &SM) { - assert(GlobalVariableTable.empty() && GlobalNumericVariableTable.empty() && - "Overriding defined variable with command-line variable definitions"); - - if (CmdlineDefines.empty()) - return Error::success(); - - // Create a string representing the vector of command-line definitions. Each - // definition is on its own line and prefixed with a definition number to - // clarify which definition a given diagnostic corresponds to. - unsigned I = 0; - Error Errs = Error::success(); - std::string CmdlineDefsDiag; - SmallVector, 4> CmdlineDefsIndices; - for (StringRef CmdlineDef : CmdlineDefines) { - std::string DefPrefix = ("Global define #" + Twine(++I) + ": ").str(); - size_t EqIdx = CmdlineDef.find('='); - if (EqIdx == StringRef::npos) { - CmdlineDefsIndices.push_back(std::make_pair(CmdlineDefsDiag.size(), 0)); - continue; - } - // Numeric variable definition. - if (CmdlineDef[0] == '#') { - // Append a copy of the command-line definition adapted to use the same - // format as in the input file to be able to reuse - // parseNumericSubstitutionBlock. - CmdlineDefsDiag += (DefPrefix + CmdlineDef + " (parsed as: [[").str(); - std::string SubstitutionStr = CmdlineDef; - SubstitutionStr[EqIdx] = ':'; - CmdlineDefsIndices.push_back( - std::make_pair(CmdlineDefsDiag.size(), SubstitutionStr.size())); - CmdlineDefsDiag += (SubstitutionStr + Twine("]])\n")).str(); - } else { - CmdlineDefsDiag += DefPrefix; - CmdlineDefsIndices.push_back( - std::make_pair(CmdlineDefsDiag.size(), CmdlineDef.size())); - CmdlineDefsDiag += (CmdlineDef + "\n").str(); - } - } - - // Create a buffer with fake command line content in order to display - // parsing diagnostic with location information and point to the - // global definition with invalid syntax. - std::unique_ptr CmdLineDefsDiagBuffer = - MemoryBuffer::getMemBufferCopy(CmdlineDefsDiag, "Global defines"); - StringRef CmdlineDefsDiagRef = CmdLineDefsDiagBuffer->getBuffer(); - SM.AddNewSourceBuffer(std::move(CmdLineDefsDiagBuffer), SMLoc()); - - for (std::pair CmdlineDefIndices : CmdlineDefsIndices) { - StringRef CmdlineDef = CmdlineDefsDiagRef.substr(CmdlineDefIndices.first, - CmdlineDefIndices.second); - if (CmdlineDef.empty()) { - Errs = joinErrors( - std::move(Errs), - FileCheckErrorDiagnostic::get( - SM, CmdlineDef, "missing equal sign in global definition")); - continue; - } - - // Numeric variable definition. - if (CmdlineDef[0] == '#') { - // Now parse the definition both to check that the syntax is correct and - // to create the necessary class instance. - StringRef CmdlineDefExpr = CmdlineDef.substr(1); - Optional DefinedNumericVariable; - Expected> ExpressionASTResult = - FileCheckPattern::parseNumericSubstitutionBlock( - CmdlineDefExpr, DefinedNumericVariable, false, None, this, SM); - if (!ExpressionASTResult) { - Errs = joinErrors(std::move(Errs), ExpressionASTResult.takeError()); - continue; - } - std::unique_ptr ExpressionAST = - std::move(*ExpressionASTResult); - // Now evaluate the expression whose value this variable should be set - // to, since the expression of a command-line variable definition should - // only use variables defined earlier on the command-line. If not, this - // is an error and we report it. - Expected Value = ExpressionAST->eval(); - if (!Value) { - Errs = joinErrors(std::move(Errs), Value.takeError()); - continue; - } - - assert(DefinedNumericVariable && "No variable defined"); - (*DefinedNumericVariable)->setValue(*Value); - - // Record this variable definition. - GlobalNumericVariableTable[(*DefinedNumericVariable)->getName()] = - *DefinedNumericVariable; - } else { - // String variable definition. - std::pair CmdlineNameVal = CmdlineDef.split('='); - StringRef CmdlineName = CmdlineNameVal.first; - StringRef OrigCmdlineName = CmdlineName; - Expected ParseVarResult = - FileCheckPattern::parseVariable(CmdlineName, SM); - if (!ParseVarResult) { - Errs = joinErrors(std::move(Errs), ParseVarResult.takeError()); - continue; - } - // Check that CmdlineName does not denote a pseudo variable is only - // composed of the parsed numeric variable. This catches cases like - // "FOO+2" in a "FOO+2=10" definition. - if (ParseVarResult->IsPseudo || !CmdlineName.empty()) { - Errs = joinErrors(std::move(Errs), - FileCheckErrorDiagnostic::get( - SM, OrigCmdlineName, - "invalid name in string variable definition '" + - OrigCmdlineName + "'")); - continue; - } - StringRef Name = ParseVarResult->Name; - - // Detect collisions between string and numeric variables when the former - // is created later than the latter. - if (GlobalNumericVariableTable.find(Name) != - GlobalNumericVariableTable.end()) { - Errs = joinErrors(std::move(Errs), FileCheckErrorDiagnostic::get( - SM, Name, - "numeric variable with name '" + - Name + "' already exists")); - continue; - } - GlobalVariableTable.insert(CmdlineNameVal); - // Mark the string variable as defined to detect collisions between - // string and numeric variables in defineCmdlineVariables when the latter - // is created later than the former. We cannot reuse GlobalVariableTable - // for this by populating it with an empty string since we would then - // lose the ability to detect the use of an undefined variable in - // match(). - DefinedVariableTable[Name] = true; - } - } - - return Errs; -} - -void FileCheckPatternContext::clearLocalVars() { - SmallVector LocalPatternVars, LocalNumericVars; - for (const StringMapEntry &Var : GlobalVariableTable) - if (Var.first()[0] != '$') - LocalPatternVars.push_back(Var.first()); - - // Numeric substitution reads the value of a variable directly, not via - // GlobalNumericVariableTable. Therefore, we clear local variables by - // clearing their value which will lead to a numeric substitution failure. We - // also mark the variable for removal from GlobalNumericVariableTable since - // this is what defineCmdlineVariables checks to decide that no global - // variable has been defined. - for (const auto &Var : GlobalNumericVariableTable) - if (Var.first()[0] != '$') { - Var.getValue()->clearValue(); - LocalNumericVars.push_back(Var.first()); - } - - for (const auto &Var : LocalPatternVars) - GlobalVariableTable.erase(Var); - for (const auto &Var : LocalNumericVars) - GlobalNumericVariableTable.erase(Var); -} - -bool FileCheck::checkInput(SourceMgr &SM, StringRef Buffer, - std::vector *Diags) { - bool ChecksFailed = false; - - unsigned i = 0, j = 0, e = CheckStrings->size(); - while (true) { - StringRef CheckRegion; - if (j == e) { - CheckRegion = Buffer; - } else { - const FileCheckString &CheckLabelStr = (*CheckStrings)[j]; - if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { - ++j; - continue; - } - - // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG - size_t MatchLabelLen = 0; - size_t MatchLabelPos = - CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, Req, Diags); - if (MatchLabelPos == StringRef::npos) - // Immediately bail if CHECK-LABEL fails, nothing else we can do. - return false; - - CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); - Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); - ++j; - } - - // Do not clear the first region as it's the one before the first - // CHECK-LABEL and it would clear variables defined on the command-line - // before they get used. - if (i != 0 && Req.EnableVarScope) - PatternContext->clearLocalVars(); - - for (; i != j; ++i) { - const FileCheckString &CheckStr = (*CheckStrings)[i]; - - // Check each string within the scanned region, including a second check - // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) - size_t MatchLen = 0; - size_t MatchPos = - CheckStr.Check(SM, CheckRegion, false, MatchLen, Req, Diags); - - if (MatchPos == StringRef::npos) { - ChecksFailed = true; - i = j; - break; - } - - CheckRegion = CheckRegion.substr(MatchPos + MatchLen); - } - - if (j == e) - break; - } - - // Success if no checks failed. - return !ChecksFailed; -} +//===- FileCheck.cpp - Check that File's Contents match what is expected --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// FileCheck does a line-by line check of a file that validates whether it +// contains the expected content. This is useful for regression tests etc. +// +// This file implements most of the API that will be used by the FileCheck utility +// as well as various unittests. +//===----------------------------------------------------------------------===// + +#include "llvm/Support/FileCheck.h" +#include "FileCheckImpl.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/FormatVariadic.h" +#include +#include +#include +#include + +using namespace llvm; + +Expected FileCheckNumericVariableUse::eval() const { + Optional Value = NumericVariable->getValue(); + if (Value) + return *Value; + + return make_error(Name); +} + +Expected FileCheckASTBinop::eval() const { + Expected LeftOp = LeftOperand->eval(); + Expected RightOp = RightOperand->eval(); + + // Bubble up any error (e.g. undefined variables) in the recursive + // evaluation. + if (!LeftOp || !RightOp) { + Error Err = Error::success(); + if (!LeftOp) + Err = joinErrors(std::move(Err), LeftOp.takeError()); + if (!RightOp) + Err = joinErrors(std::move(Err), RightOp.takeError()); + return std::move(Err); + } + + return EvalBinop(*LeftOp, *RightOp); +} + +Expected FileCheckNumericSubstitution::getResult() const { + Expected EvaluatedValue = ExpressionAST->eval(); + if (!EvaluatedValue) + return EvaluatedValue.takeError(); + return utostr(*EvaluatedValue); +} + +Expected FileCheckStringSubstitution::getResult() const { + // Look up the value and escape it so that we can put it into the regex. + Expected VarVal = Context->getPatternVarValue(FromStr); + if (!VarVal) + return VarVal.takeError(); + return Regex::escape(*VarVal); +} + +bool FileCheckPattern::isValidVarNameStart(char C) { + return C == '_' || isalpha(C); +} + +Expected +FileCheckPattern::parseVariable(StringRef &Str, const SourceMgr &SM) { + if (Str.empty()) + return FileCheckErrorDiagnostic::get(SM, Str, "empty variable name"); + + bool ParsedOneChar = false; + unsigned I = 0; + bool IsPseudo = Str[0] == '@'; + + // Global vars start with '$'. + if (Str[0] == '$' || IsPseudo) + ++I; + + for (unsigned E = Str.size(); I != E; ++I) { + if (!ParsedOneChar && !isValidVarNameStart(Str[I])) + return FileCheckErrorDiagnostic::get(SM, Str, "invalid variable name"); + + // Variable names are composed of alphanumeric characters and underscores. + if (Str[I] != '_' && !isalnum(Str[I])) + break; + ParsedOneChar = true; + } + + StringRef Name = Str.take_front(I); + Str = Str.substr(I); + return VariableProperties {Name, IsPseudo}; +} + +// StringRef holding all characters considered as horizontal whitespaces by +// FileCheck input canonicalization. +constexpr StringLiteral SpaceChars = " \t"; + +// Parsing helper function that strips the first character in S and returns it. +static char popFront(StringRef &S) { + char C = S.front(); + S = S.drop_front(); + return C; +} + +char FileCheckUndefVarError::ID = 0; +char FileCheckErrorDiagnostic::ID = 0; +char FileCheckNotFoundError::ID = 0; + +Expected +FileCheckPattern::parseNumericVariableDefinition( + StringRef &Expr, FileCheckPatternContext *Context, + Optional LineNumber, const SourceMgr &SM) { + Expected ParseVarResult = parseVariable(Expr, SM); + if (!ParseVarResult) + return ParseVarResult.takeError(); + StringRef Name = ParseVarResult->Name; + + if (ParseVarResult->IsPseudo) + return FileCheckErrorDiagnostic::get( + SM, Name, "definition of pseudo numeric variable unsupported"); + + // Detect collisions between string and numeric variables when the latter + // is created later than the former. + if (Context->DefinedVariableTable.find(Name) != + Context->DefinedVariableTable.end()) + return FileCheckErrorDiagnostic::get( + SM, Name, "string variable with name '" + Name + "' already exists"); + + Expr = Expr.ltrim(SpaceChars); + if (!Expr.empty()) + return FileCheckErrorDiagnostic::get( + SM, Expr, "unexpected characters after numeric variable name"); + + FileCheckNumericVariable *DefinedNumericVariable; + auto VarTableIter = Context->GlobalNumericVariableTable.find(Name); + if (VarTableIter != Context->GlobalNumericVariableTable.end()) + DefinedNumericVariable = VarTableIter->second; + else + DefinedNumericVariable = Context->makeNumericVariable(Name, LineNumber); + + return DefinedNumericVariable; +} + +Expected> +FileCheckPattern::parseNumericVariableUse(StringRef Name, bool IsPseudo, + Optional LineNumber, + FileCheckPatternContext *Context, + const SourceMgr &SM) { + if (IsPseudo && !Name.equals("@LINE")) + return FileCheckErrorDiagnostic::get( + SM, Name, "invalid pseudo numeric variable '" + Name + "'"); + + // Numeric variable definitions and uses are parsed in the order in which + // they appear in the CHECK patterns. For each definition, the pointer to the + // class instance of the corresponding numeric variable definition is stored + // in GlobalNumericVariableTable in parsePattern. Therefore, if the pointer + // we get below is null, it means no such variable was defined before. When + // that happens, we create a dummy variable so that parsing can continue. All + // uses of undefined variables, whether string or numeric, are then diagnosed + // in printSubstitutions() after failing to match. + auto VarTableIter = Context->GlobalNumericVariableTable.find(Name); + FileCheckNumericVariable *NumericVariable; + if (VarTableIter != Context->GlobalNumericVariableTable.end()) + NumericVariable = VarTableIter->second; + else { + NumericVariable = Context->makeNumericVariable(Name); + Context->GlobalNumericVariableTable[Name] = NumericVariable; + } + + Optional DefLineNumber = NumericVariable->getDefLineNumber(); + if (DefLineNumber && LineNumber && *DefLineNumber == *LineNumber) + return FileCheckErrorDiagnostic::get( + SM, Name, + "numeric variable '" + Name + + "' defined earlier in the same CHECK directive"); + + return std::make_unique(Name, NumericVariable); +} + +Expected> +FileCheckPattern::parseNumericOperand(StringRef &Expr, AllowedOperand AO, + Optional LineNumber, + FileCheckPatternContext *Context, + const SourceMgr &SM) { + if (AO == AllowedOperand::LineVar || AO == AllowedOperand::Any) { + // Try to parse as a numeric variable use. + Expected ParseVarResult = + parseVariable(Expr, SM); + if (ParseVarResult) + return parseNumericVariableUse(ParseVarResult->Name, + ParseVarResult->IsPseudo, LineNumber, + Context, SM); + if (AO == AllowedOperand::LineVar) + return ParseVarResult.takeError(); + // Ignore the error and retry parsing as a literal. + consumeError(ParseVarResult.takeError()); + } + + // Otherwise, parse it as a literal. + uint64_t LiteralValue; + if (!Expr.consumeInteger(/*Radix=*/10, LiteralValue)) + return std::make_unique(LiteralValue); + + return FileCheckErrorDiagnostic::get(SM, Expr, + "invalid operand format '" + Expr + "'"); +} + +static uint64_t add(uint64_t LeftOp, uint64_t RightOp) { + return LeftOp + RightOp; +} + +static uint64_t sub(uint64_t LeftOp, uint64_t RightOp) { + return LeftOp - RightOp; +} + +Expected> FileCheckPattern::parseBinop( + StringRef &Expr, std::unique_ptr LeftOp, + bool IsLegacyLineExpr, Optional LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM) { + Expr = Expr.ltrim(SpaceChars); + if (Expr.empty()) + return std::move(LeftOp); + + // Check if this is a supported operation and select a function to perform + // it. + SMLoc OpLoc = SMLoc::getFromPointer(Expr.data()); + char Operator = popFront(Expr); + binop_eval_t EvalBinop; + switch (Operator) { + case '+': + EvalBinop = add; + break; + case '-': + EvalBinop = sub; + break; + default: + return FileCheckErrorDiagnostic::get( + SM, OpLoc, Twine("unsupported operation '") + Twine(Operator) + "'"); + } + + // Parse right operand. + Expr = Expr.ltrim(SpaceChars); + if (Expr.empty()) + return FileCheckErrorDiagnostic::get(SM, Expr, + "missing operand in expression"); + // The second operand in a legacy @LINE expression is always a literal. + AllowedOperand AO = + IsLegacyLineExpr ? AllowedOperand::Literal : AllowedOperand::Any; + Expected> RightOpResult = + parseNumericOperand(Expr, AO, LineNumber, Context, SM); + if (!RightOpResult) + return RightOpResult; + + Expr = Expr.ltrim(SpaceChars); + return std::make_unique(EvalBinop, std::move(LeftOp), + std::move(*RightOpResult)); +} + +Expected> +FileCheckPattern::parseNumericSubstitutionBlock( + StringRef Expr, + Optional &DefinedNumericVariable, + bool IsLegacyLineExpr, Optional LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM) { + std::unique_ptr ExpressionAST = nullptr; + StringRef DefExpr = StringRef(); + DefinedNumericVariable = None; + // Save variable definition expression if any. + size_t DefEnd = Expr.find(':'); + if (DefEnd != StringRef::npos) { + DefExpr = Expr.substr(0, DefEnd); + Expr = Expr.substr(DefEnd + 1); + } + + // Parse the expression itself. + Expr = Expr.ltrim(SpaceChars); + if (!Expr.empty()) { + // The first operand in a legacy @LINE expression is always the @LINE + // pseudo variable. + AllowedOperand AO = + IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any; + Expected> ParseResult = + parseNumericOperand(Expr, AO, LineNumber, Context, SM); + while (ParseResult && !Expr.empty()) { + ParseResult = parseBinop(Expr, std::move(*ParseResult), IsLegacyLineExpr, + LineNumber, Context, SM); + // Legacy @LINE expressions only allow 2 operands. + if (ParseResult && IsLegacyLineExpr && !Expr.empty()) + return FileCheckErrorDiagnostic::get( + SM, Expr, + "unexpected characters at end of expression '" + Expr + "'"); + } + if (!ParseResult) + return ParseResult; + ExpressionAST = std::move(*ParseResult); + } + + // Parse the numeric variable definition. + if (DefEnd != StringRef::npos) { + DefExpr = DefExpr.ltrim(SpaceChars); + Expected ParseResult = + parseNumericVariableDefinition(DefExpr, Context, LineNumber, SM); + + if (!ParseResult) + return ParseResult.takeError(); + DefinedNumericVariable = *ParseResult; + } + + return std::move(ExpressionAST); +} + +bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix, + SourceMgr &SM, + const FileCheckRequest &Req) { + bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot; + + PatternLoc = SMLoc::getFromPointer(PatternStr.data()); + + if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines)) + // Ignore trailing whitespace. + while (!PatternStr.empty() && + (PatternStr.back() == ' ' || PatternStr.back() == '\t')) + PatternStr = PatternStr.substr(0, PatternStr.size() - 1); + + // Check that there is something on the line. + if (PatternStr.empty() && CheckTy != Check::CheckEmpty) { + SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, + "found empty check string with prefix '" + Prefix + ":'"); + return true; + } + + if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) { + SM.PrintMessage( + PatternLoc, SourceMgr::DK_Error, + "found non-empty check string for empty check with prefix '" + Prefix + + ":'"); + return true; + } + + if (CheckTy == Check::CheckEmpty) { + RegExStr = "(\n$)"; + return false; + } + + // Check to see if this is a fixed string, or if it has regex pieces. + if (!MatchFullLinesHere && + (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos && + PatternStr.find("[[") == StringRef::npos))) { + FixedStr = PatternStr; + return false; + } + + if (MatchFullLinesHere) { + RegExStr += '^'; + if (!Req.NoCanonicalizeWhiteSpace) + RegExStr += " *"; + } + + // Paren value #0 is for the fully matched string. Any new parenthesized + // values add from there. + unsigned CurParen = 1; + + // Otherwise, there is at least one regex piece. Build up the regex pattern + // by escaping scary characters in fixed strings, building up one big regex. + while (!PatternStr.empty()) { + // RegEx matches. + if (PatternStr.startswith("{{")) { + // This is the start of a regex match. Scan for the }}. + size_t End = PatternStr.find("}}"); + if (End == StringRef::npos) { + SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), + SourceMgr::DK_Error, + "found start of regex string with no end '}}'"); + return true; + } + + // Enclose {{}} patterns in parens just like [[]] even though we're not + // capturing the result for any purpose. This is required in case the + // expression contains an alternation like: CHECK: abc{{x|z}}def. We + // want this to turn into: "abc(x|z)def" not "abcx|zdef". + RegExStr += '('; + ++CurParen; + + if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM)) + return true; + RegExStr += ')'; + + PatternStr = PatternStr.substr(End + 2); + continue; + } + + // String and numeric substitution blocks. Pattern substitution blocks come + // in two forms: [[foo:.*]] and [[foo]]. The former matches .* (or some + // other regex) and assigns it to the string variable 'foo'. The latter + // substitutes foo's value. Numeric substitution blocks recognize the same + // form as string ones, but start with a '#' sign after the double + // brackets. They also accept a combined form which sets a numeric variable + // to the evaluation of an expression. Both string and numeric variable + // names must satisfy the regular expression "[a-zA-Z_][0-9a-zA-Z_]*" to be + // valid, as this helps catch some common errors. + if (PatternStr.startswith("[[")) { + StringRef UnparsedPatternStr = PatternStr.substr(2); + // Find the closing bracket pair ending the match. End is going to be an + // offset relative to the beginning of the match string. + size_t End = FindRegexVarEnd(UnparsedPatternStr, SM); + StringRef MatchStr = UnparsedPatternStr.substr(0, End); + bool IsNumBlock = MatchStr.consume_front("#"); + + if (End == StringRef::npos) { + SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), + SourceMgr::DK_Error, + "Invalid substitution block, no ]] found"); + return true; + } + // Strip the substitution block we are parsing. End points to the start + // of the "]]" closing the expression so account for it in computing the + // index of the first unparsed character. + PatternStr = UnparsedPatternStr.substr(End + 2); + + bool IsDefinition = false; + bool SubstNeeded = false; + // Whether the substitution block is a legacy use of @LINE with string + // substitution block syntax. + bool IsLegacyLineExpr = false; + StringRef DefName; + StringRef SubstStr; + StringRef MatchRegexp; + size_t SubstInsertIdx = RegExStr.size(); + + // Parse string variable or legacy @LINE expression. + if (!IsNumBlock) { + size_t VarEndIdx = MatchStr.find(":"); + size_t SpacePos = MatchStr.substr(0, VarEndIdx).find_first_of(" \t"); + if (SpacePos != StringRef::npos) { + SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data() + SpacePos), + SourceMgr::DK_Error, "unexpected whitespace"); + return true; + } + + // Get the name (e.g. "foo") and verify it is well formed. + StringRef OrigMatchStr = MatchStr; + Expected ParseVarResult = + parseVariable(MatchStr, SM); + if (!ParseVarResult) { + logAllUnhandledErrors(ParseVarResult.takeError(), errs()); + return true; + } + StringRef Name = ParseVarResult->Name; + bool IsPseudo = ParseVarResult->IsPseudo; + + IsDefinition = (VarEndIdx != StringRef::npos); + SubstNeeded = !IsDefinition; + if (IsDefinition) { + if ((IsPseudo || !MatchStr.consume_front(":"))) { + SM.PrintMessage(SMLoc::getFromPointer(Name.data()), + SourceMgr::DK_Error, + "invalid name in string variable definition"); + return true; + } + + // Detect collisions between string and numeric variables when the + // former is created later than the latter. + if (Context->GlobalNumericVariableTable.find(Name) != + Context->GlobalNumericVariableTable.end()) { + SM.PrintMessage( + SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, + "numeric variable with name '" + Name + "' already exists"); + return true; + } + DefName = Name; + MatchRegexp = MatchStr; + } else { + if (IsPseudo) { + MatchStr = OrigMatchStr; + IsLegacyLineExpr = IsNumBlock = true; + } else + SubstStr = Name; + } + } + + // Parse numeric substitution block. + std::unique_ptr ExpressionAST; + Optional DefinedNumericVariable; + if (IsNumBlock) { + Expected> ParseResult = + parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable, + IsLegacyLineExpr, LineNumber, Context, + SM); + if (!ParseResult) { + logAllUnhandledErrors(ParseResult.takeError(), errs()); + return true; + } + ExpressionAST = std::move(*ParseResult); + SubstNeeded = ExpressionAST != nullptr; + if (DefinedNumericVariable) { + IsDefinition = true; + DefName = (*DefinedNumericVariable)->getName(); + } + if (SubstNeeded) + SubstStr = MatchStr; + else + MatchRegexp = "[0-9]+"; + } + + // Handle variable definition: [[:(...)]] and [[#(...):(...)]]. + if (IsDefinition) { + RegExStr += '('; + ++SubstInsertIdx; + + if (IsNumBlock) { + FileCheckNumericVariableMatch NumericVariableDefinition = { + *DefinedNumericVariable, CurParen}; + NumericVariableDefs[DefName] = NumericVariableDefinition; + // This store is done here rather than in match() to allow + // parseNumericVariableUse() to get the pointer to the class instance + // of the right variable definition corresponding to a given numeric + // variable use. + Context->GlobalNumericVariableTable[DefName] = + *DefinedNumericVariable; + } else { + VariableDefs[DefName] = CurParen; + // Mark string variable as defined to detect collisions between + // string and numeric variables in parseNumericVariableUse() and + // defineCmdlineVariables() when the latter is created later than the + // former. We cannot reuse GlobalVariableTable for this by populating + // it with an empty string since we would then lose the ability to + // detect the use of an undefined variable in match(). + Context->DefinedVariableTable[DefName] = true; + } + + ++CurParen; + } + + if (!MatchRegexp.empty() && AddRegExToRegEx(MatchRegexp, CurParen, SM)) + return true; + + if (IsDefinition) + RegExStr += ')'; + + // Handle substitutions: [[foo]] and [[#]]. + if (SubstNeeded) { + // Handle substitution of string variables that were defined earlier on + // the same line by emitting a backreference. Expressions do not + // support substituting a numeric variable defined on the same line. + if (!IsNumBlock && VariableDefs.find(SubstStr) != VariableDefs.end()) { + unsigned CaptureParenGroup = VariableDefs[SubstStr]; + if (CaptureParenGroup < 1 || CaptureParenGroup > 9) { + SM.PrintMessage(SMLoc::getFromPointer(SubstStr.data()), + SourceMgr::DK_Error, + "Can't back-reference more than 9 variables"); + return true; + } + AddBackrefToRegEx(CaptureParenGroup); + } else { + // Handle substitution of string variables ([[]]) defined in + // previous CHECK patterns, and substitution of expressions. + FileCheckSubstitution *Substitution = + IsNumBlock + ? Context->makeNumericSubstitution( + SubstStr, std::move(ExpressionAST), SubstInsertIdx) + : Context->makeStringSubstitution(SubstStr, SubstInsertIdx); + Substitutions.push_back(Substitution); + } + } + } + + // Handle fixed string matches. + // Find the end, which is the start of the next regex. + size_t FixedMatchEnd = PatternStr.find("{{"); + FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); + RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); + PatternStr = PatternStr.substr(FixedMatchEnd); + } + + if (MatchFullLinesHere) { + if (!Req.NoCanonicalizeWhiteSpace) + RegExStr += " *"; + RegExStr += '$'; + } + + return false; +} + +bool FileCheckPattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { + Regex R(RS); + std::string Error; + if (!R.isValid(Error)) { + SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, + "invalid regex: " + Error); + return true; + } + + RegExStr += RS.str(); + CurParen += R.getNumMatches(); + return false; +} + +void FileCheckPattern::AddBackrefToRegEx(unsigned BackrefNum) { + assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); + std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum); + RegExStr += Backref; +} + +Expected FileCheckPattern::match(StringRef Buffer, size_t &MatchLen, + const SourceMgr &SM) const { + // If this is the EOF pattern, match it immediately. + if (CheckTy == Check::CheckEOF) { + MatchLen = 0; + return Buffer.size(); + } + + // If this is a fixed string pattern, just match it now. + if (!FixedStr.empty()) { + MatchLen = FixedStr.size(); + size_t Pos = Buffer.find(FixedStr); + if (Pos == StringRef::npos) + return make_error(); + return Pos; + } + + // Regex match. + + // If there are substitutions, we need to create a temporary string with the + // actual value. + StringRef RegExToMatch = RegExStr; + std::string TmpStr; + if (!Substitutions.empty()) { + TmpStr = RegExStr; + if (LineNumber) + Context->LineVariable->setValue(*LineNumber); + + size_t InsertOffset = 0; + // Substitute all string variables and expressions whose values are only + // now known. Use of string variables defined on the same line are handled + // by back-references. + for (const auto &Substitution : Substitutions) { + // Substitute and check for failure (e.g. use of undefined variable). + Expected Value = Substitution->getResult(); + if (!Value) + return Value.takeError(); + + // Plop it into the regex at the adjusted offset. + TmpStr.insert(TmpStr.begin() + Substitution->getIndex() + InsertOffset, + Value->begin(), Value->end()); + InsertOffset += Value->size(); + } + + // Match the newly constructed regex. + RegExToMatch = TmpStr; + } + + SmallVector MatchInfo; + if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) + return make_error(); + + // Successful regex match. + assert(!MatchInfo.empty() && "Didn't get any match"); + StringRef FullMatch = MatchInfo[0]; + + // If this defines any string variables, remember their values. + for (const auto &VariableDef : VariableDefs) { + assert(VariableDef.second < MatchInfo.size() && "Internal paren error"); + Context->GlobalVariableTable[VariableDef.first] = + MatchInfo[VariableDef.second]; + } + + // If this defines any numeric variables, remember their values. + for (const auto &NumericVariableDef : NumericVariableDefs) { + const FileCheckNumericVariableMatch &NumericVariableMatch = + NumericVariableDef.getValue(); + unsigned CaptureParenGroup = NumericVariableMatch.CaptureParenGroup; + assert(CaptureParenGroup < MatchInfo.size() && "Internal paren error"); + FileCheckNumericVariable *DefinedNumericVariable = + NumericVariableMatch.DefinedNumericVariable; + + StringRef MatchedValue = MatchInfo[CaptureParenGroup]; + uint64_t Val; + if (MatchedValue.getAsInteger(10, Val)) + return FileCheckErrorDiagnostic::get(SM, MatchedValue, + "Unable to represent numeric value"); + DefinedNumericVariable->setValue(Val); + } + + // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after + // the required preceding newline, which is consumed by the pattern in the + // case of CHECK-EMPTY but not CHECK-NEXT. + size_t MatchStartSkip = CheckTy == Check::CheckEmpty; + MatchLen = FullMatch.size() - MatchStartSkip; + return FullMatch.data() - Buffer.data() + MatchStartSkip; +} + +unsigned FileCheckPattern::computeMatchDistance(StringRef Buffer) const { + // Just compute the number of matching characters. For regular expressions, we + // just compare against the regex itself and hope for the best. + // + // FIXME: One easy improvement here is have the regex lib generate a single + // example regular expression which matches, and use that as the example + // string. + StringRef ExampleString(FixedStr); + if (ExampleString.empty()) + ExampleString = RegExStr; + + // Only compare up to the first line in the buffer, or the string size. + StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); + BufferPrefix = BufferPrefix.split('\n').first; + return BufferPrefix.edit_distance(ExampleString); +} + +void FileCheckPattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer, + SMRange MatchRange) const { + // Print what we know about substitutions. + if (!Substitutions.empty()) { + for (const auto &Substitution : Substitutions) { + SmallString<256> Msg; + raw_svector_ostream OS(Msg); + Expected MatchedValue = Substitution->getResult(); + + // Substitution failed or is not known at match time, print the undefined + // variables it uses. + if (!MatchedValue) { + bool UndefSeen = false; + handleAllErrors(MatchedValue.takeError(), + [](const FileCheckNotFoundError &E) {}, + // Handled in PrintNoMatch(). + [](const FileCheckErrorDiagnostic &E) {}, + [&](const FileCheckUndefVarError &E) { + if (!UndefSeen) { + OS << "uses undefined variable(s):"; + UndefSeen = true; + } + OS << " "; + E.log(OS); + }); + } else { + // Substitution succeeded. Print substituted value. + OS << "with \""; + OS.write_escaped(Substitution->getFromString()) << "\" equal to \""; + OS.write_escaped(*MatchedValue) << "\""; + } + + if (MatchRange.isValid()) + SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, OS.str(), + {MatchRange}); + else + SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), + SourceMgr::DK_Note, OS.str()); + } + } +} + +static SMRange ProcessMatchResult(FileCheckDiag::MatchType MatchTy, + const SourceMgr &SM, SMLoc Loc, + Check::FileCheckType CheckTy, + StringRef Buffer, size_t Pos, size_t Len, + std::vector *Diags, + bool AdjustPrevDiag = false) { + SMLoc Start = SMLoc::getFromPointer(Buffer.data() + Pos); + SMLoc End = SMLoc::getFromPointer(Buffer.data() + Pos + Len); + SMRange Range(Start, End); + if (Diags) { + if (AdjustPrevDiag) + Diags->rbegin()->MatchTy = MatchTy; + else + Diags->emplace_back(SM, CheckTy, Loc, MatchTy, Range); + } + return Range; +} + +void FileCheckPattern::printFuzzyMatch( + const SourceMgr &SM, StringRef Buffer, + std::vector *Diags) const { + // Attempt to find the closest/best fuzzy match. Usually an error happens + // because some string in the output didn't exactly match. In these cases, we + // would like to show the user a best guess at what "should have" matched, to + // save them having to actually check the input manually. + size_t NumLinesForward = 0; + size_t Best = StringRef::npos; + double BestQuality = 0; + + // Use an arbitrary 4k limit on how far we will search. + for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { + if (Buffer[i] == '\n') + ++NumLinesForward; + + // Patterns have leading whitespace stripped, so skip whitespace when + // looking for something which looks like a pattern. + if (Buffer[i] == ' ' || Buffer[i] == '\t') + continue; + + // Compute the "quality" of this match as an arbitrary combination of the + // match distance and the number of lines skipped to get to this match. + unsigned Distance = computeMatchDistance(Buffer.substr(i)); + double Quality = Distance + (NumLinesForward / 100.); + + if (Quality < BestQuality || Best == StringRef::npos) { + Best = i; + BestQuality = Quality; + } + } + + // Print the "possible intended match here" line if we found something + // reasonable and not equal to what we showed in the "scanning from here" + // line. + if (Best && Best != StringRef::npos && BestQuality < 50) { + SMRange MatchRange = + ProcessMatchResult(FileCheckDiag::MatchFuzzy, SM, getLoc(), + getCheckTy(), Buffer, Best, 0, Diags); + SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, + "possible intended match here"); + + // FIXME: If we wanted to be really friendly we would show why the match + // failed, as it can be hard to spot simple one character differences. + } +} + +Expected +FileCheckPatternContext::getPatternVarValue(StringRef VarName) { + auto VarIter = GlobalVariableTable.find(VarName); + if (VarIter == GlobalVariableTable.end()) + return make_error(VarName); + + return VarIter->second; +} + +template +FileCheckNumericVariable * +FileCheckPatternContext::makeNumericVariable(Types... args) { + NumericVariables.push_back( + std::make_unique(args...)); + return NumericVariables.back().get(); +} + +FileCheckSubstitution * +FileCheckPatternContext::makeStringSubstitution(StringRef VarName, + size_t InsertIdx) { + Substitutions.push_back( + std::make_unique(this, VarName, InsertIdx)); + return Substitutions.back().get(); +} + +FileCheckSubstitution *FileCheckPatternContext::makeNumericSubstitution( + StringRef ExpressionStr, + std::unique_ptr ExpressionAST, size_t InsertIdx) { + Substitutions.push_back(std::make_unique( + this, ExpressionStr, std::move(ExpressionAST), InsertIdx)); + return Substitutions.back().get(); +} + +size_t FileCheckPattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { + // Offset keeps track of the current offset within the input Str + size_t Offset = 0; + // [...] Nesting depth + size_t BracketDepth = 0; + + while (!Str.empty()) { + if (Str.startswith("]]") && BracketDepth == 0) + return Offset; + if (Str[0] == '\\') { + // Backslash escapes the next char within regexes, so skip them both. + Str = Str.substr(2); + Offset += 2; + } else { + switch (Str[0]) { + default: + break; + case '[': + BracketDepth++; + break; + case ']': + if (BracketDepth == 0) { + SM.PrintMessage(SMLoc::getFromPointer(Str.data()), + SourceMgr::DK_Error, + "missing closing \"]\" for regex variable"); + exit(1); + } + BracketDepth--; + break; + } + Str = Str.substr(1); + Offset++; + } + } + + return StringRef::npos; +} + +StringRef FileCheck::CanonicalizeFile(MemoryBuffer &MB, + SmallVectorImpl &OutputBuffer) { + OutputBuffer.reserve(MB.getBufferSize()); + + for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd(); + Ptr != End; ++Ptr) { + // Eliminate trailing dosish \r. + if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { + continue; + } + + // If current char is not a horizontal whitespace or if horizontal + // whitespace canonicalization is disabled, dump it to output as is. + if (Req.NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) { + OutputBuffer.push_back(*Ptr); + continue; + } + + // Otherwise, add one space and advance over neighboring space. + OutputBuffer.push_back(' '); + while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t')) + ++Ptr; + } + + // Add a null byte and then return all but that byte. + OutputBuffer.push_back('\0'); + return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1); +} + +FileCheckDiag::FileCheckDiag(const SourceMgr &SM, + const Check::FileCheckType &CheckTy, + SMLoc CheckLoc, MatchType MatchTy, + SMRange InputRange) + : CheckTy(CheckTy), MatchTy(MatchTy) { + auto Start = SM.getLineAndColumn(InputRange.Start); + auto End = SM.getLineAndColumn(InputRange.End); + InputStartLine = Start.first; + InputStartCol = Start.second; + InputEndLine = End.first; + InputEndCol = End.second; + Start = SM.getLineAndColumn(CheckLoc); + CheckLine = Start.first; + CheckCol = Start.second; +} + +static bool IsPartOfWord(char c) { + return (isalnum(c) || c == '-' || c == '_'); +} + +Check::FileCheckType &Check::FileCheckType::setCount(int C) { + assert(Count > 0 && "zero and negative counts are not supported"); + assert((C == 1 || Kind == CheckPlain) && + "count supported only for plain CHECK directives"); + Count = C; + return *this; +} + +std::string Check::FileCheckType::getDescription(StringRef Prefix) const { + switch (Kind) { + case Check::CheckNone: + return "invalid"; + case Check::CheckPlain: + if (Count > 1) + return Prefix.str() + "-COUNT"; + return Prefix; + case Check::CheckNext: + return Prefix.str() + "-NEXT"; + case Check::CheckSame: + return Prefix.str() + "-SAME"; + case Check::CheckNot: + return Prefix.str() + "-NOT"; + case Check::CheckDAG: + return Prefix.str() + "-DAG"; + case Check::CheckLabel: + return Prefix.str() + "-LABEL"; + case Check::CheckEmpty: + return Prefix.str() + "-EMPTY"; + case Check::CheckEOF: + return "implicit EOF"; + case Check::CheckBadNot: + return "bad NOT"; + case Check::CheckBadCount: + return "bad COUNT"; + } + llvm_unreachable("unknown FileCheckType"); +} + +static std::pair +FindCheckType(StringRef Buffer, StringRef Prefix) { + if (Buffer.size() <= Prefix.size()) + return {Check::CheckNone, StringRef()}; + + char NextChar = Buffer[Prefix.size()]; + + StringRef Rest = Buffer.drop_front(Prefix.size() + 1); + // Verify that the : is present after the prefix. + if (NextChar == ':') + return {Check::CheckPlain, Rest}; + + if (NextChar != '-') + return {Check::CheckNone, StringRef()}; + + if (Rest.consume_front("COUNT-")) { + int64_t Count; + if (Rest.consumeInteger(10, Count)) + // Error happened in parsing integer. + return {Check::CheckBadCount, Rest}; + if (Count <= 0 || Count > INT32_MAX) + return {Check::CheckBadCount, Rest}; + if (!Rest.consume_front(":")) + return {Check::CheckBadCount, Rest}; + return {Check::FileCheckType(Check::CheckPlain).setCount(Count), Rest}; + } + + if (Rest.consume_front("NEXT:")) + return {Check::CheckNext, Rest}; + + if (Rest.consume_front("SAME:")) + return {Check::CheckSame, Rest}; + + if (Rest.consume_front("NOT:")) + return {Check::CheckNot, Rest}; + + if (Rest.consume_front("DAG:")) + return {Check::CheckDAG, Rest}; + + if (Rest.consume_front("LABEL:")) + return {Check::CheckLabel, Rest}; + + if (Rest.consume_front("EMPTY:")) + return {Check::CheckEmpty, Rest}; + + // You can't combine -NOT with another suffix. + if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") || + Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") || + Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") || + Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:")) + return {Check::CheckBadNot, Rest}; + + return {Check::CheckNone, Rest}; +} + +// From the given position, find the next character after the word. +static size_t SkipWord(StringRef Str, size_t Loc) { + while (Loc < Str.size() && IsPartOfWord(Str[Loc])) + ++Loc; + return Loc; +} + +/// Searches the buffer for the first prefix in the prefix regular expression. +/// +/// This searches the buffer using the provided regular expression, however it +/// enforces constraints beyond that: +/// 1) The found prefix must not be a suffix of something that looks like +/// a valid prefix. +/// 2) The found prefix must be followed by a valid check type suffix using \c +/// FindCheckType above. +/// +/// \returns a pair of StringRefs into the Buffer, which combines: +/// - the first match of the regular expression to satisfy these two is +/// returned, +/// otherwise an empty StringRef is returned to indicate failure. +/// - buffer rewound to the location right after parsed suffix, for parsing +/// to continue from +/// +/// If this routine returns a valid prefix, it will also shrink \p Buffer to +/// start at the beginning of the returned prefix, increment \p LineNumber for +/// each new line consumed from \p Buffer, and set \p CheckTy to the type of +/// check found by examining the suffix. +/// +/// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy +/// is unspecified. +static std::pair +FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer, + unsigned &LineNumber, Check::FileCheckType &CheckTy) { + SmallVector Matches; + + while (!Buffer.empty()) { + // Find the first (longest) match using the RE. + if (!PrefixRE.match(Buffer, &Matches)) + // No match at all, bail. + return {StringRef(), StringRef()}; + + StringRef Prefix = Matches[0]; + Matches.clear(); + + assert(Prefix.data() >= Buffer.data() && + Prefix.data() < Buffer.data() + Buffer.size() && + "Prefix doesn't start inside of buffer!"); + size_t Loc = Prefix.data() - Buffer.data(); + StringRef Skipped = Buffer.substr(0, Loc); + Buffer = Buffer.drop_front(Loc); + LineNumber += Skipped.count('\n'); + + // Check that the matched prefix isn't a suffix of some other check-like + // word. + // FIXME: This is a very ad-hoc check. it would be better handled in some + // other way. Among other things it seems hard to distinguish between + // intentional and unintentional uses of this feature. + if (Skipped.empty() || !IsPartOfWord(Skipped.back())) { + // Now extract the type. + StringRef AfterSuffix; + std::tie(CheckTy, AfterSuffix) = FindCheckType(Buffer, Prefix); + + // If we've found a valid check type for this prefix, we're done. + if (CheckTy != Check::CheckNone) + return {Prefix, AfterSuffix}; + } + + // If we didn't successfully find a prefix, we need to skip this invalid + // prefix and continue scanning. We directly skip the prefix that was + // matched and any additional parts of that check-like word. + Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size())); + } + + // We ran out of buffer while skipping partial matches so give up. + return {StringRef(), StringRef()}; +} + +void FileCheckPatternContext::createLineVariable() { + assert(!LineVariable && "@LINE pseudo numeric variable already created"); + StringRef LineName = "@LINE"; + LineVariable = makeNumericVariable(LineName); + GlobalNumericVariableTable[LineName] = LineVariable; +} + +FileCheck::FileCheck(FileCheckRequest Req) + : Req(Req), PatternContext(std::make_unique()), + CheckStrings(std::make_unique>()) {} + +FileCheck::~FileCheck() = default; + +bool FileCheck::readCheckFile(SourceMgr &SM, StringRef Buffer, + Regex &PrefixRE) { + Error DefineError = + PatternContext->defineCmdlineVariables(Req.GlobalDefines, SM); + if (DefineError) { + logAllUnhandledErrors(std::move(DefineError), errs()); + return true; + } + + PatternContext->createLineVariable(); + + std::vector ImplicitNegativeChecks; + for (const auto &PatternString : Req.ImplicitCheckNot) { + // Create a buffer with fake command line content in order to display the + // command line option responsible for the specific implicit CHECK-NOT. + std::string Prefix = "-implicit-check-not='"; + std::string Suffix = "'"; + std::unique_ptr CmdLine = MemoryBuffer::getMemBufferCopy( + Prefix + PatternString + Suffix, "command line"); + + StringRef PatternInBuffer = + CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); + SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); + + ImplicitNegativeChecks.push_back( + FileCheckPattern(Check::CheckNot, PatternContext.get())); + ImplicitNegativeChecks.back().parsePattern(PatternInBuffer, + "IMPLICIT-CHECK", SM, Req); + } + + std::vector DagNotMatches = ImplicitNegativeChecks; + + // LineNumber keeps track of the line on which CheckPrefix instances are + // found. + unsigned LineNumber = 1; + + while (1) { + Check::FileCheckType CheckTy; + + // See if a prefix occurs in the memory buffer. + StringRef UsedPrefix; + StringRef AfterSuffix; + std::tie(UsedPrefix, AfterSuffix) = + FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber, CheckTy); + if (UsedPrefix.empty()) + break; + assert(UsedPrefix.data() == Buffer.data() && + "Failed to move Buffer's start forward, or pointed prefix outside " + "of the buffer!"); + assert(AfterSuffix.data() >= Buffer.data() && + AfterSuffix.data() < Buffer.data() + Buffer.size() && + "Parsing after suffix doesn't start inside of buffer!"); + + // Location to use for error messages. + const char *UsedPrefixStart = UsedPrefix.data(); + + // Skip the buffer to the end of parsed suffix (or just prefix, if no good + // suffix was processed). + Buffer = AfterSuffix.empty() ? Buffer.drop_front(UsedPrefix.size()) + : AfterSuffix; + + // Complain about useful-looking but unsupported suffixes. + if (CheckTy == Check::CheckBadNot) { + SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, + "unsupported -NOT combo on prefix '" + UsedPrefix + "'"); + return true; + } + + // Complain about invalid count specification. + if (CheckTy == Check::CheckBadCount) { + SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error, + "invalid count in -COUNT specification on prefix '" + + UsedPrefix + "'"); + return true; + } + + // Okay, we found the prefix, yay. Remember the rest of the line, but ignore + // leading whitespace. + if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines)) + Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); + + // Scan ahead to the end of line. + size_t EOL = Buffer.find_first_of("\n\r"); + + // Remember the location of the start of the pattern, for diagnostics. + SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); + + // Parse the pattern. + FileCheckPattern P(CheckTy, PatternContext.get(), LineNumber); + if (P.parsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, Req)) + return true; + + // Verify that CHECK-LABEL lines do not define or use variables + if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { + SM.PrintMessage( + SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error, + "found '" + UsedPrefix + "-LABEL:'" + " with variable definition or use"); + return true; + } + + Buffer = Buffer.substr(EOL); + + // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them. + if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame || + CheckTy == Check::CheckEmpty) && + CheckStrings->empty()) { + StringRef Type = CheckTy == Check::CheckNext + ? "NEXT" + : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME"; + SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), + SourceMgr::DK_Error, + "found '" + UsedPrefix + "-" + Type + + "' without previous '" + UsedPrefix + ": line"); + return true; + } + + // Handle CHECK-DAG/-NOT. + if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { + DagNotMatches.push_back(P); + continue; + } + + // Okay, add the string we captured to the output vector and move on. + CheckStrings->emplace_back(P, UsedPrefix, PatternLoc); + std::swap(DagNotMatches, CheckStrings->back().DagNotStrings); + DagNotMatches = ImplicitNegativeChecks; + } + + // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first + // prefix as a filler for the error message. + if (!DagNotMatches.empty()) { + CheckStrings->emplace_back( + FileCheckPattern(Check::CheckEOF, PatternContext.get(), LineNumber + 1), + *Req.CheckPrefixes.begin(), SMLoc::getFromPointer(Buffer.data())); + std::swap(DagNotMatches, CheckStrings->back().DagNotStrings); + } + + if (CheckStrings->empty()) { + errs() << "error: no check strings found with prefix" + << (Req.CheckPrefixes.size() > 1 ? "es " : " "); + auto I = Req.CheckPrefixes.begin(); + auto E = Req.CheckPrefixes.end(); + if (I != E) { + errs() << "\'" << *I << ":'"; + ++I; + } + for (; I != E; ++I) + errs() << ", \'" << *I << ":'"; + + errs() << '\n'; + return true; + } + + return false; +} + +static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM, + StringRef Prefix, SMLoc Loc, const FileCheckPattern &Pat, + int MatchedCount, StringRef Buffer, size_t MatchPos, + size_t MatchLen, const FileCheckRequest &Req, + std::vector *Diags) { + bool PrintDiag = true; + if (ExpectedMatch) { + if (!Req.Verbose) + return; + if (!Req.VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF) + return; + // Due to their verbosity, we don't print verbose diagnostics here if we're + // gathering them for a different rendering, but we always print other + // diagnostics. + PrintDiag = !Diags; + } + SMRange MatchRange = ProcessMatchResult( + ExpectedMatch ? FileCheckDiag::MatchFoundAndExpected + : FileCheckDiag::MatchFoundButExcluded, + SM, Loc, Pat.getCheckTy(), Buffer, MatchPos, MatchLen, Diags); + if (!PrintDiag) + return; + + std::string Message = formatv("{0}: {1} string found in input", + Pat.getCheckTy().getDescription(Prefix), + (ExpectedMatch ? "expected" : "excluded")) + .str(); + if (Pat.getCount() > 1) + Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str(); + + SM.PrintMessage( + Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error, Message); + SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, "found here", + {MatchRange}); + Pat.printSubstitutions(SM, Buffer, MatchRange); +} + +static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM, + const FileCheckString &CheckStr, int MatchedCount, + StringRef Buffer, size_t MatchPos, size_t MatchLen, + FileCheckRequest &Req, + std::vector *Diags) { + PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat, + MatchedCount, Buffer, MatchPos, MatchLen, Req, Diags); +} + +static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM, + StringRef Prefix, SMLoc Loc, + const FileCheckPattern &Pat, int MatchedCount, + StringRef Buffer, bool VerboseVerbose, + std::vector *Diags, Error MatchErrors) { + assert(MatchErrors && "Called on successful match"); + bool PrintDiag = true; + if (!ExpectedMatch) { + if (!VerboseVerbose) { + consumeError(std::move(MatchErrors)); + return; + } + // Due to their verbosity, we don't print verbose diagnostics here if we're + // gathering them for a different rendering, but we always print other + // diagnostics. + PrintDiag = !Diags; + } + + // If the current position is at the end of a line, advance to the start of + // the next line. + Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); + SMRange SearchRange = ProcessMatchResult( + ExpectedMatch ? FileCheckDiag::MatchNoneButExpected + : FileCheckDiag::MatchNoneAndExcluded, + SM, Loc, Pat.getCheckTy(), Buffer, 0, Buffer.size(), Diags); + if (!PrintDiag) { + consumeError(std::move(MatchErrors)); + return; + } + + MatchErrors = + handleErrors(std::move(MatchErrors), + [](const FileCheckErrorDiagnostic &E) { E.log(errs()); }); + + // No problem matching the string per se. + if (!MatchErrors) + return; + consumeError(std::move(MatchErrors)); + + // Print "not found" diagnostic. + std::string Message = formatv("{0}: {1} string not found in input", + Pat.getCheckTy().getDescription(Prefix), + (ExpectedMatch ? "expected" : "excluded")) + .str(); + if (Pat.getCount() > 1) + Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str(); + SM.PrintMessage( + Loc, ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark, Message); + + // Print the "scanning from here" line. + SM.PrintMessage(SearchRange.Start, SourceMgr::DK_Note, "scanning from here"); + + // Allow the pattern to print additional information if desired. + Pat.printSubstitutions(SM, Buffer); + + if (ExpectedMatch) + Pat.printFuzzyMatch(SM, Buffer, Diags); +} + +static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM, + const FileCheckString &CheckStr, int MatchedCount, + StringRef Buffer, bool VerboseVerbose, + std::vector *Diags, Error MatchErrors) { + PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat, + MatchedCount, Buffer, VerboseVerbose, Diags, + std::move(MatchErrors)); +} + +/// Counts the number of newlines in the specified range. +static unsigned CountNumNewlinesBetween(StringRef Range, + const char *&FirstNewLine) { + unsigned NumNewLines = 0; + while (1) { + // Scan for newline. + Range = Range.substr(Range.find_first_of("\n\r")); + if (Range.empty()) + return NumNewLines; + + ++NumNewLines; + + // Handle \n\r and \r\n as a single newline. + if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') && + (Range[0] != Range[1])) + Range = Range.substr(1); + Range = Range.substr(1); + + if (NumNewLines == 1) + FirstNewLine = Range.begin(); + } +} + +size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer, + bool IsLabelScanMode, size_t &MatchLen, + FileCheckRequest &Req, + std::vector *Diags) const { + size_t LastPos = 0; + std::vector NotStrings; + + // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL + // bounds; we have not processed variable definitions within the bounded block + // yet so cannot handle any final CHECK-DAG yet; this is handled when going + // over the block again (including the last CHECK-LABEL) in normal mode. + if (!IsLabelScanMode) { + // Match "dag strings" (with mixed "not strings" if any). + LastPos = CheckDag(SM, Buffer, NotStrings, Req, Diags); + if (LastPos == StringRef::npos) + return StringRef::npos; + } + + // Match itself from the last position after matching CHECK-DAG. + size_t LastMatchEnd = LastPos; + size_t FirstMatchPos = 0; + // Go match the pattern Count times. Majority of patterns only match with + // count 1 though. + assert(Pat.getCount() != 0 && "pattern count can not be zero"); + for (int i = 1; i <= Pat.getCount(); i++) { + StringRef MatchBuffer = Buffer.substr(LastMatchEnd); + size_t CurrentMatchLen; + // get a match at current start point + Expected MatchResult = Pat.match(MatchBuffer, CurrentMatchLen, SM); + + // report + if (!MatchResult) { + PrintNoMatch(true, SM, *this, i, MatchBuffer, Req.VerboseVerbose, Diags, + MatchResult.takeError()); + return StringRef::npos; + } + size_t MatchPos = *MatchResult; + PrintMatch(true, SM, *this, i, MatchBuffer, MatchPos, CurrentMatchLen, Req, + Diags); + if (i == 1) + FirstMatchPos = LastPos + MatchPos; + + // move start point after the match + LastMatchEnd += MatchPos + CurrentMatchLen; + } + // Full match len counts from first match pos. + MatchLen = LastMatchEnd - FirstMatchPos; + + // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT + // or CHECK-NOT + if (!IsLabelScanMode) { + size_t MatchPos = FirstMatchPos - LastPos; + StringRef MatchBuffer = Buffer.substr(LastPos); + StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); + + // If this check is a "CHECK-NEXT", verify that the previous match was on + // the previous line (i.e. that there is one newline between them). + if (CheckNext(SM, SkippedRegion)) { + ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc, + Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen, + Diags, Req.Verbose); + return StringRef::npos; + } + + // If this check is a "CHECK-SAME", verify that the previous match was on + // the same line (i.e. that there is no newline between them). + if (CheckSame(SM, SkippedRegion)) { + ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc, + Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen, + Diags, Req.Verbose); + return StringRef::npos; + } + + // If this match had "not strings", verify that they don't exist in the + // skipped region. + if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags)) + return StringRef::npos; + } + + return FirstMatchPos; +} + +bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { + if (Pat.getCheckTy() != Check::CheckNext && + Pat.getCheckTy() != Check::CheckEmpty) + return false; + + Twine CheckName = + Prefix + + Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT"); + + // Count the number of newlines between the previous match and this one. + const char *FirstNewLine = nullptr; + unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); + + if (NumNewLines == 0) { + SM.PrintMessage(Loc, SourceMgr::DK_Error, + CheckName + ": is on the same line as previous match"); + SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, + "'next' match was here"); + SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, + "previous match ended here"); + return true; + } + + if (NumNewLines != 1) { + SM.PrintMessage(Loc, SourceMgr::DK_Error, + CheckName + + ": is not on the line after the previous match"); + SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, + "'next' match was here"); + SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, + "previous match ended here"); + SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, + "non-matching line after previous match is here"); + return true; + } + + return false; +} + +bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const { + if (Pat.getCheckTy() != Check::CheckSame) + return false; + + // Count the number of newlines between the previous match and this one. + const char *FirstNewLine = nullptr; + unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); + + if (NumNewLines != 0) { + SM.PrintMessage(Loc, SourceMgr::DK_Error, + Prefix + + "-SAME: is not on the same line as the previous match"); + SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note, + "'next' match was here"); + SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, + "previous match ended here"); + return true; + } + + return false; +} + +bool FileCheckString::CheckNot( + const SourceMgr &SM, StringRef Buffer, + const std::vector &NotStrings, + const FileCheckRequest &Req, std::vector *Diags) const { + for (const FileCheckPattern *Pat : NotStrings) { + assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); + + size_t MatchLen = 0; + Expected MatchResult = Pat->match(Buffer, MatchLen, SM); + + if (!MatchResult) { + PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, + Req.VerboseVerbose, Diags, MatchResult.takeError()); + continue; + } + size_t Pos = *MatchResult; + + PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, Pos, MatchLen, + Req, Diags); + + return true; + } + + return false; +} + +size_t +FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, + std::vector &NotStrings, + const FileCheckRequest &Req, + std::vector *Diags) const { + if (DagNotStrings.empty()) + return 0; + + // The start of the search range. + size_t StartPos = 0; + + struct MatchRange { + size_t Pos; + size_t End; + }; + // A sorted list of ranges for non-overlapping CHECK-DAG matches. Match + // ranges are erased from this list once they are no longer in the search + // range. + std::list MatchRanges; + + // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG + // group, so we don't use a range-based for loop here. + for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end(); + PatItr != PatEnd; ++PatItr) { + const FileCheckPattern &Pat = *PatItr; + assert((Pat.getCheckTy() == Check::CheckDAG || + Pat.getCheckTy() == Check::CheckNot) && + "Invalid CHECK-DAG or CHECK-NOT!"); + + if (Pat.getCheckTy() == Check::CheckNot) { + NotStrings.push_back(&Pat); + continue; + } + + assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); + + // CHECK-DAG always matches from the start. + size_t MatchLen = 0, MatchPos = StartPos; + + // Search for a match that doesn't overlap a previous match in this + // CHECK-DAG group. + for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) { + StringRef MatchBuffer = Buffer.substr(MatchPos); + Expected MatchResult = Pat.match(MatchBuffer, MatchLen, SM); + // With a group of CHECK-DAGs, a single mismatching means the match on + // that group of CHECK-DAGs fails immediately. + if (!MatchResult) { + PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, MatchBuffer, + Req.VerboseVerbose, Diags, MatchResult.takeError()); + return StringRef::npos; + } + size_t MatchPosBuf = *MatchResult; + // Re-calc it as the offset relative to the start of the original string. + MatchPos += MatchPosBuf; + if (Req.VerboseVerbose) + PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos, + MatchLen, Req, Diags); + MatchRange M{MatchPos, MatchPos + MatchLen}; + if (Req.AllowDeprecatedDagOverlap) { + // We don't need to track all matches in this mode, so we just maintain + // one match range that encompasses the current CHECK-DAG group's + // matches. + if (MatchRanges.empty()) + MatchRanges.insert(MatchRanges.end(), M); + else { + auto Block = MatchRanges.begin(); + Block->Pos = std::min(Block->Pos, M.Pos); + Block->End = std::max(Block->End, M.End); + } + break; + } + // Iterate previous matches until overlapping match or insertion point. + bool Overlap = false; + for (; MI != ME; ++MI) { + if (M.Pos < MI->End) { + // !Overlap => New match has no overlap and is before this old match. + // Overlap => New match overlaps this old match. + Overlap = MI->Pos < M.End; + break; + } + } + if (!Overlap) { + // Insert non-overlapping match into list. + MatchRanges.insert(MI, M); + break; + } + if (Req.VerboseVerbose) { + // Due to their verbosity, we don't print verbose diagnostics here if + // we're gathering them for a different rendering, but we always print + // other diagnostics. + if (!Diags) { + SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos); + SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End); + SMRange OldRange(OldStart, OldEnd); + SM.PrintMessage(OldStart, SourceMgr::DK_Note, + "match discarded, overlaps earlier DAG match here", + {OldRange}); + } else + Diags->rbegin()->MatchTy = FileCheckDiag::MatchFoundButDiscarded; + } + MatchPos = MI->End; + } + if (!Req.VerboseVerbose) + PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos, + MatchLen, Req, Diags); + + // Handle the end of a CHECK-DAG group. + if (std::next(PatItr) == PatEnd || + std::next(PatItr)->getCheckTy() == Check::CheckNot) { + if (!NotStrings.empty()) { + // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to + // CHECK-DAG, verify that there are no 'not' strings occurred in that + // region. + StringRef SkippedRegion = + Buffer.slice(StartPos, MatchRanges.begin()->Pos); + if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags)) + return StringRef::npos; + // Clear "not strings". + NotStrings.clear(); + } + // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the + // end of this CHECK-DAG group's match range. + StartPos = MatchRanges.rbegin()->End; + // Don't waste time checking for (impossible) overlaps before that. + MatchRanges.clear(); + } + } + + return StartPos; +} + +// A check prefix must contain only alphanumeric, hyphens and underscores. +static bool ValidateCheckPrefix(StringRef CheckPrefix) { + static const Regex Validator("^[a-zA-Z0-9_-]*$"); + return Validator.match(CheckPrefix); +} + +bool FileCheck::ValidateCheckPrefixes() { + StringSet<> PrefixSet; + + for (StringRef Prefix : Req.CheckPrefixes) { + // Reject empty prefixes. + if (Prefix == "") + return false; + + if (!PrefixSet.insert(Prefix).second) + return false; + + if (!ValidateCheckPrefix(Prefix)) + return false; + } + + return true; +} + +Regex FileCheck::buildCheckPrefixRegex() { + // I don't think there's a way to specify an initial value for cl::list, + // so if nothing was specified, add the default + if (Req.CheckPrefixes.empty()) + Req.CheckPrefixes.push_back("CHECK"); + + // We already validated the contents of CheckPrefixes so just concatenate + // them as alternatives. + SmallString<32> PrefixRegexStr; + for (StringRef Prefix : Req.CheckPrefixes) { + if (Prefix != Req.CheckPrefixes.front()) + PrefixRegexStr.push_back('|'); + + PrefixRegexStr.append(Prefix); + } + + return Regex(PrefixRegexStr); +} + +Error FileCheckPatternContext::defineCmdlineVariables( + std::vector &CmdlineDefines, SourceMgr &SM) { + assert(GlobalVariableTable.empty() && GlobalNumericVariableTable.empty() && + "Overriding defined variable with command-line variable definitions"); + + if (CmdlineDefines.empty()) + return Error::success(); + + // Create a string representing the vector of command-line definitions. Each + // definition is on its own line and prefixed with a definition number to + // clarify which definition a given diagnostic corresponds to. + unsigned I = 0; + Error Errs = Error::success(); + std::string CmdlineDefsDiag; + SmallVector, 4> CmdlineDefsIndices; + for (StringRef CmdlineDef : CmdlineDefines) { + std::string DefPrefix = ("Global define #" + Twine(++I) + ": ").str(); + size_t EqIdx = CmdlineDef.find('='); + if (EqIdx == StringRef::npos) { + CmdlineDefsIndices.push_back(std::make_pair(CmdlineDefsDiag.size(), 0)); + continue; + } + // Numeric variable definition. + if (CmdlineDef[0] == '#') { + // Append a copy of the command-line definition adapted to use the same + // format as in the input file to be able to reuse + // parseNumericSubstitutionBlock. + CmdlineDefsDiag += (DefPrefix + CmdlineDef + " (parsed as: [[").str(); + std::string SubstitutionStr = CmdlineDef; + SubstitutionStr[EqIdx] = ':'; + CmdlineDefsIndices.push_back( + std::make_pair(CmdlineDefsDiag.size(), SubstitutionStr.size())); + CmdlineDefsDiag += (SubstitutionStr + Twine("]])\n")).str(); + } else { + CmdlineDefsDiag += DefPrefix; + CmdlineDefsIndices.push_back( + std::make_pair(CmdlineDefsDiag.size(), CmdlineDef.size())); + CmdlineDefsDiag += (CmdlineDef + "\n").str(); + } + } + + // Create a buffer with fake command line content in order to display + // parsing diagnostic with location information and point to the + // global definition with invalid syntax. + std::unique_ptr CmdLineDefsDiagBuffer = + MemoryBuffer::getMemBufferCopy(CmdlineDefsDiag, "Global defines"); + StringRef CmdlineDefsDiagRef = CmdLineDefsDiagBuffer->getBuffer(); + SM.AddNewSourceBuffer(std::move(CmdLineDefsDiagBuffer), SMLoc()); + + for (std::pair CmdlineDefIndices : CmdlineDefsIndices) { + StringRef CmdlineDef = CmdlineDefsDiagRef.substr(CmdlineDefIndices.first, + CmdlineDefIndices.second); + if (CmdlineDef.empty()) { + Errs = joinErrors( + std::move(Errs), + FileCheckErrorDiagnostic::get( + SM, CmdlineDef, "missing equal sign in global definition")); + continue; + } + + // Numeric variable definition. + if (CmdlineDef[0] == '#') { + // Now parse the definition both to check that the syntax is correct and + // to create the necessary class instance. + StringRef CmdlineDefExpr = CmdlineDef.substr(1); + Optional DefinedNumericVariable; + Expected> ExpressionASTResult = + FileCheckPattern::parseNumericSubstitutionBlock( + CmdlineDefExpr, DefinedNumericVariable, false, None, this, SM); + if (!ExpressionASTResult) { + Errs = joinErrors(std::move(Errs), ExpressionASTResult.takeError()); + continue; + } + std::unique_ptr ExpressionAST = + std::move(*ExpressionASTResult); + // Now evaluate the expression whose value this variable should be set + // to, since the expression of a command-line variable definition should + // only use variables defined earlier on the command-line. If not, this + // is an error and we report it. + Expected Value = ExpressionAST->eval(); + if (!Value) { + Errs = joinErrors(std::move(Errs), Value.takeError()); + continue; + } + + assert(DefinedNumericVariable && "No variable defined"); + (*DefinedNumericVariable)->setValue(*Value); + + // Record this variable definition. + GlobalNumericVariableTable[(*DefinedNumericVariable)->getName()] = + *DefinedNumericVariable; + } else { + // String variable definition. + std::pair CmdlineNameVal = CmdlineDef.split('='); + StringRef CmdlineName = CmdlineNameVal.first; + StringRef OrigCmdlineName = CmdlineName; + Expected ParseVarResult = + FileCheckPattern::parseVariable(CmdlineName, SM); + if (!ParseVarResult) { + Errs = joinErrors(std::move(Errs), ParseVarResult.takeError()); + continue; + } + // Check that CmdlineName does not denote a pseudo variable is only + // composed of the parsed numeric variable. This catches cases like + // "FOO+2" in a "FOO+2=10" definition. + if (ParseVarResult->IsPseudo || !CmdlineName.empty()) { + Errs = joinErrors(std::move(Errs), + FileCheckErrorDiagnostic::get( + SM, OrigCmdlineName, + "invalid name in string variable definition '" + + OrigCmdlineName + "'")); + continue; + } + StringRef Name = ParseVarResult->Name; + + // Detect collisions between string and numeric variables when the former + // is created later than the latter. + if (GlobalNumericVariableTable.find(Name) != + GlobalNumericVariableTable.end()) { + Errs = joinErrors(std::move(Errs), FileCheckErrorDiagnostic::get( + SM, Name, + "numeric variable with name '" + + Name + "' already exists")); + continue; + } + GlobalVariableTable.insert(CmdlineNameVal); + // Mark the string variable as defined to detect collisions between + // string and numeric variables in defineCmdlineVariables when the latter + // is created later than the former. We cannot reuse GlobalVariableTable + // for this by populating it with an empty string since we would then + // lose the ability to detect the use of an undefined variable in + // match(). + DefinedVariableTable[Name] = true; + } + } + + return Errs; +} + +void FileCheckPatternContext::clearLocalVars() { + SmallVector LocalPatternVars, LocalNumericVars; + for (const StringMapEntry &Var : GlobalVariableTable) + if (Var.first()[0] != '$') + LocalPatternVars.push_back(Var.first()); + + // Numeric substitution reads the value of a variable directly, not via + // GlobalNumericVariableTable. Therefore, we clear local variables by + // clearing their value which will lead to a numeric substitution failure. We + // also mark the variable for removal from GlobalNumericVariableTable since + // this is what defineCmdlineVariables checks to decide that no global + // variable has been defined. + for (const auto &Var : GlobalNumericVariableTable) + if (Var.first()[0] != '$') { + Var.getValue()->clearValue(); + LocalNumericVars.push_back(Var.first()); + } + + for (const auto &Var : LocalPatternVars) + GlobalVariableTable.erase(Var); + for (const auto &Var : LocalNumericVars) + GlobalNumericVariableTable.erase(Var); +} + +bool FileCheck::checkInput(SourceMgr &SM, StringRef Buffer, + std::vector *Diags) { + bool ChecksFailed = false; + + unsigned i = 0, j = 0, e = CheckStrings->size(); + while (true) { + StringRef CheckRegion; + if (j == e) { + CheckRegion = Buffer; + } else { + const FileCheckString &CheckLabelStr = (*CheckStrings)[j]; + if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { + ++j; + continue; + } + + // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG + size_t MatchLabelLen = 0; + size_t MatchLabelPos = + CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, Req, Diags); + if (MatchLabelPos == StringRef::npos) + // Immediately bail if CHECK-LABEL fails, nothing else we can do. + return false; + + CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); + Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); + ++j; + } + + // Do not clear the first region as it's the one before the first + // CHECK-LABEL and it would clear variables defined on the command-line + // before they get used. + if (i != 0 && Req.EnableVarScope) + PatternContext->clearLocalVars(); + + for (; i != j; ++i) { + const FileCheckString &CheckStr = (*CheckStrings)[i]; + + // Check each string within the scanned region, including a second check + // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) + size_t MatchLen = 0; + size_t MatchPos = + CheckStr.Check(SM, CheckRegion, false, MatchLen, Req, Diags); + + if (MatchPos == StringRef::npos) { + ChecksFailed = true; + i = j; + break; + } + + CheckRegion = CheckRegion.substr(MatchPos + MatchLen); + } + + if (j == e) + break; + } + + // Success if no checks failed. + return !ChecksFailed; +} diff --git a/lib/Support/FileCheckImpl.h b/lib/Support/FileCheckImpl.h index 6581612a8e4..001b3589d5f 100644 --- a/lib/Support/FileCheckImpl.h +++ b/lib/Support/FileCheckImpl.h @@ -1,624 +1,621 @@ -//===-- FileCheckImpl.h - Private FileCheck Interface ------------*- C++ -*-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the private interfaces of FileCheck. Its purpose is to -// allow unit testing of FileCheck and to separate the interface from the -// implementation. It is only meant to be used by FileCheck. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_SUPPORT_FILECHECKIMPL_H -#define LLVM_LIB_SUPPORT_FILECHECKIMPL_H - -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/SourceMgr.h" -#include -#include -#include - -namespace llvm { - -//===----------------------------------------------------------------------===// -// Numeric substitution handling code. -//===----------------------------------------------------------------------===// - -/// Base class representing the AST of a given expression. -class FileCheckExpressionAST { -public: - virtual ~FileCheckExpressionAST() = default; - - /// Evaluates and \returns the value of the expression represented by this - /// AST or an error if evaluation fails. - virtual Expected eval() const = 0; -}; - -/// Class representing an unsigned literal in the AST of an expression. -class FileCheckExpressionLiteral : public FileCheckExpressionAST { -private: - /// Actual value of the literal. - uint64_t Value; - -public: - /// Constructs a literal with the specified value. - FileCheckExpressionLiteral(uint64_t Val) : Value(Val) {} - - /// \returns the literal's value. - Expected eval() const { return Value; } -}; - -/// Class to represent an undefined variable error, which quotes that -/// variable's name when printed. -class FileCheckUndefVarError : public ErrorInfo { -private: - StringRef VarName; - -public: - static char ID; - - FileCheckUndefVarError(StringRef VarName) : VarName(VarName) {} - - StringRef getVarName() const { return VarName; } - - std::error_code convertToErrorCode() const override { - return inconvertibleErrorCode(); - } - - /// Print name of variable associated with this error. - void log(raw_ostream &OS) const override { - OS << "\""; - OS.write_escaped(VarName) << "\""; - } -}; - -/// Class representing a numeric variable and its associated current value. -class FileCheckNumericVariable { -private: - /// Name of the numeric variable. - StringRef Name; - - /// Value of numeric variable, if defined, or None otherwise. - Optional Value; - - /// Line number where this variable is defined, or None if defined before - /// input is parsed. Used to determine whether a variable is defined on the - /// same line as a given use. - Optional DefLineNumber; - -public: - /// Constructor for a variable \p Name defined at line \p DefLineNumber or - /// defined before input is parsed if \p DefLineNumber is None. - explicit FileCheckNumericVariable(StringRef Name, - Optional DefLineNumber = None) - : Name(Name), DefLineNumber(DefLineNumber) {} - - /// \returns name of this numeric variable. - StringRef getName() const { return Name; } - - /// \returns this variable's value. - Optional getValue() const { return Value; } - - /// Sets value of this numeric variable to \p NewValue. - void setValue(uint64_t NewValue) { Value = NewValue; } - - /// Clears value of this numeric variable, regardless of whether it is - /// currently defined or not. - void clearValue() { Value = None; } - - /// \returns the line number where this variable is defined, if any, or None - /// if defined before input is parsed. - Optional getDefLineNumber() { return DefLineNumber; } -}; - -/// Class representing the use of a numeric variable in the AST of an -/// expression. -class FileCheckNumericVariableUse : public FileCheckExpressionAST { -private: - /// Name of the numeric variable. - StringRef Name; - - /// Pointer to the class instance for the variable this use is about. - FileCheckNumericVariable *NumericVariable; - -public: - FileCheckNumericVariableUse(StringRef Name, - FileCheckNumericVariable *NumericVariable) - : Name(Name), NumericVariable(NumericVariable) {} - - /// \returns the value of the variable referenced by this instance. - Expected eval() const; -}; - -/// Type of functions evaluating a given binary operation. -using binop_eval_t = uint64_t (*)(uint64_t, uint64_t); - -/// Class representing a single binary operation in the AST of an expression. -class FileCheckASTBinop : public FileCheckExpressionAST { -private: - /// Left operand. - std::unique_ptr LeftOperand; - - /// Right operand. - std::unique_ptr RightOperand; - - /// Pointer to function that can evaluate this binary operation. - binop_eval_t EvalBinop; - -public: - FileCheckASTBinop(binop_eval_t EvalBinop, - std::unique_ptr LeftOp, - std::unique_ptr RightOp) - : EvalBinop(EvalBinop) { - LeftOperand = std::move(LeftOp); - RightOperand = std::move(RightOp); - } - - /// Evaluates the value of the binary operation represented by this AST, - /// using EvalBinop on the result of recursively evaluating the operands. - /// \returns the expression value or an error if an undefined numeric - /// variable is used in one of the operands. - Expected eval() const; -}; - -class FileCheckPatternContext; - -/// Class representing a substitution to perform in the RegExStr string. -class FileCheckSubstitution { -protected: - /// Pointer to a class instance holding, among other things, the table with - /// the values of live string variables at the start of any given CHECK line. - /// Used for substituting string variables with the text they were defined - /// as. Expressions are linked to the numeric variables they use at - /// parse time and directly access the value of the numeric variable to - /// evaluate their value. - FileCheckPatternContext *Context; - - /// The string that needs to be substituted for something else. For a - /// string variable this is its name, otherwise this is the whole expression. - StringRef FromStr; - - // Index in RegExStr of where to do the substitution. - size_t InsertIdx; - -public: - FileCheckSubstitution(FileCheckPatternContext *Context, StringRef VarName, - size_t InsertIdx) - : Context(Context), FromStr(VarName), InsertIdx(InsertIdx) {} - - virtual ~FileCheckSubstitution() = default; - - /// \returns the string to be substituted for something else. - StringRef getFromString() const { return FromStr; } - - /// \returns the index where the substitution is to be performed in RegExStr. - size_t getIndex() const { return InsertIdx; } - - /// \returns a string containing the result of the substitution represented - /// by this class instance or an error if substitution failed. - virtual Expected getResult() const = 0; -}; - -class FileCheckStringSubstitution : public FileCheckSubstitution { -public: - FileCheckStringSubstitution(FileCheckPatternContext *Context, - StringRef VarName, size_t InsertIdx) - : FileCheckSubstitution(Context, VarName, InsertIdx) {} - - /// \returns the text that the string variable in this substitution matched - /// when defined, or an error if the variable is undefined. - Expected getResult() const override; -}; - -class FileCheckNumericSubstitution : public FileCheckSubstitution { -private: - /// Pointer to the class representing the expression whose value is to be - /// substituted. - std::unique_ptr ExpressionAST; - -public: - FileCheckNumericSubstitution(FileCheckPatternContext *Context, StringRef Expr, - std::unique_ptr ExprAST, - size_t InsertIdx) - : FileCheckSubstitution(Context, Expr, InsertIdx) { - ExpressionAST = std::move(ExprAST); - } - - /// \returns a string containing the result of evaluating the expression in - /// this substitution, or an error if evaluation failed. - Expected getResult() const override; -}; - -//===----------------------------------------------------------------------===// -// Pattern handling code. -//===----------------------------------------------------------------------===// - -struct FileCheckDiag; - -/// Class holding the FileCheckPattern global state, shared by all patterns: -/// tables holding values of variables and whether they are defined or not at -/// any given time in the matching process. -class FileCheckPatternContext { - friend class FileCheckPattern; - -private: - /// When matching a given pattern, this holds the value of all the string - /// variables defined in previous patterns. In a pattern, only the last - /// definition for a given variable is recorded in this table. - /// Back-references are used for uses after any the other definition. - StringMap GlobalVariableTable; - - /// Map of all string variables defined so far. Used at parse time to detect - /// a name conflict between a numeric variable and a string variable when - /// the former is defined on a later line than the latter. - StringMap DefinedVariableTable; - - /// When matching a given pattern, this holds the pointers to the classes - /// representing the numeric variables defined in previous patterns. When - /// matching a pattern all definitions for that pattern are recorded in the - /// NumericVariableDefs table in the FileCheckPattern instance of that - /// pattern. - StringMap GlobalNumericVariableTable; - - /// Pointer to the class instance representing the @LINE pseudo variable for - /// easily updating its value. - FileCheckNumericVariable *LineVariable = nullptr; - - /// Vector holding pointers to all parsed numeric variables. Used to - /// automatically free them once they are guaranteed to no longer be used. - std::vector> NumericVariables; - - /// Vector holding pointers to all substitutions. Used to automatically free - /// them once they are guaranteed to no longer be used. - std::vector> Substitutions; - -public: - /// \returns the value of string variable \p VarName or an error if no such - /// variable has been defined. - Expected getPatternVarValue(StringRef VarName); - - /// Defines string and numeric variables from definitions given on the - /// command line, passed as a vector of [#]VAR=VAL strings in - /// \p CmdlineDefines. \returns an error list containing diagnostics against - /// \p SM for all definition parsing failures, if any, or Success otherwise. - Error defineCmdlineVariables(std::vector &CmdlineDefines, - SourceMgr &SM); - - /// Create @LINE pseudo variable. Value is set when pattern are being - /// matched. - void createLineVariable(); - - /// Undefines local variables (variables whose name does not start with a '$' - /// sign), i.e. removes them from GlobalVariableTable and from - /// GlobalNumericVariableTable and also clears the value of numeric - /// variables. - void clearLocalVars(); - -private: - /// Makes a new numeric variable and registers it for destruction when the - /// context is destroyed. - template - FileCheckNumericVariable *makeNumericVariable(Types... args); - - /// Makes a new string substitution and registers it for destruction when the - /// context is destroyed. - FileCheckSubstitution *makeStringSubstitution(StringRef VarName, - size_t InsertIdx); - - /// Makes a new numeric substitution and registers it for destruction when - /// the context is destroyed. - FileCheckSubstitution * - makeNumericSubstitution(StringRef ExpressionStr, - std::unique_ptr ExpressionAST, - size_t InsertIdx); -}; - -/// Class to represent an error holding a diagnostic with location information -/// used when printing it. -class FileCheckErrorDiagnostic : public ErrorInfo { -private: - SMDiagnostic Diagnostic; - -public: - static char ID; - - FileCheckErrorDiagnostic(SMDiagnostic &&Diag) : Diagnostic(Diag) {} - - std::error_code convertToErrorCode() const override { - return inconvertibleErrorCode(); - } - - /// Print diagnostic associated with this error when printing the error. - void log(raw_ostream &OS) const override { Diagnostic.print(nullptr, OS); } - - static Error get(const SourceMgr &SM, SMLoc Loc, const Twine &ErrMsg) { - return make_error( - SM.GetMessage(Loc, SourceMgr::DK_Error, ErrMsg)); - } - - static Error get(const SourceMgr &SM, StringRef Buffer, const Twine &ErrMsg) { - return get(SM, SMLoc::getFromPointer(Buffer.data()), ErrMsg); - } -}; - -class FileCheckNotFoundError : public ErrorInfo { -public: - static char ID; - - std::error_code convertToErrorCode() const override { - return inconvertibleErrorCode(); - } - - /// Print diagnostic associated with this error when printing the error. - void log(raw_ostream &OS) const override { - OS << "String not found in input"; - } -}; - -class FileCheckPattern { - SMLoc PatternLoc; - - /// A fixed string to match as the pattern or empty if this pattern requires - /// a regex match. - StringRef FixedStr; - - /// A regex string to match as the pattern or empty if this pattern requires - /// a fixed string to match. - std::string RegExStr; - - /// Entries in this vector represent a substitution of a string variable or - /// an expression in the RegExStr regex at match time. For example, in the - /// case of a CHECK directive with the pattern "foo[[bar]]baz[[#N+1]]", - /// RegExStr will contain "foobaz" and we'll get two entries in this vector - /// that tells us to insert the value of string variable "bar" at offset 3 - /// and the value of expression "N+1" at offset 6. - std::vector Substitutions; - - /// Maps names of string variables defined in a pattern to the number of - /// their parenthesis group in RegExStr capturing their last definition. - /// - /// E.g. for the pattern "foo[[bar:.*]]baz([[bar]][[QUUX]][[bar:.*]])", - /// RegExStr will be "foo(.*)baz(\1(.*))" where is - /// the value captured for QUUX on the earlier line where it was defined, and - /// VariableDefs will map "bar" to the third parenthesis group which captures - /// the second definition of "bar". - /// - /// Note: uses std::map rather than StringMap to be able to get the key when - /// iterating over values. - std::map VariableDefs; - - /// Structure representing the definition of a numeric variable in a pattern. - /// It holds the pointer to the class representing the numeric variable whose - /// value is being defined and the number of the parenthesis group in - /// RegExStr to capture that value. - struct FileCheckNumericVariableMatch { - /// Pointer to class representing the numeric variable whose value is being - /// defined. - FileCheckNumericVariable *DefinedNumericVariable; - - /// Number of the parenthesis group in RegExStr that captures the value of - /// this numeric variable definition. - unsigned CaptureParenGroup; - }; - - /// Holds the number of the parenthesis group in RegExStr and pointer to the - /// corresponding FileCheckNumericVariable class instance of all numeric - /// variable definitions. Used to set the matched value of all those - /// variables. - StringMap NumericVariableDefs; - - /// Pointer to a class instance holding the global state shared by all - /// patterns: - /// - separate tables with the values of live string and numeric variables - /// respectively at the start of any given CHECK line; - /// - table holding whether a string variable has been defined at any given - /// point during the parsing phase. - FileCheckPatternContext *Context; - - Check::FileCheckType CheckTy; - - /// Line number for this CHECK pattern or None if it is an implicit pattern. - /// Used to determine whether a variable definition is made on an earlier - /// line to the one with this CHECK. - Optional LineNumber; - - /// Ignore case while matching if set to true. - bool IgnoreCase = false; - -public: - FileCheckPattern(Check::FileCheckType Ty, FileCheckPatternContext *Context, - Optional Line = None) - : Context(Context), CheckTy(Ty), LineNumber(Line) {} - - /// \returns the location in source code. - SMLoc getLoc() const { return PatternLoc; } - - /// \returns the pointer to the global state for all patterns in this - /// FileCheck instance. - FileCheckPatternContext *getContext() const { return Context; } - - /// \returns whether \p C is a valid first character for a variable name. - static bool isValidVarNameStart(char C); - - /// Parsing information about a variable. - struct VariableProperties { - StringRef Name; - bool IsPseudo; - }; - - /// Parses the string at the start of \p Str for a variable name. \returns - /// a VariableProperties structure holding the variable name and whether it - /// is the name of a pseudo variable, or an error holding a diagnostic - /// against \p SM if parsing fail. If parsing was successful, also strips - /// \p Str from the variable name. - static Expected parseVariable(StringRef &Str, - const SourceMgr &SM); - /// Parses \p Expr for a numeric substitution block at line \p LineNumber, - /// or before input is parsed if \p LineNumber is None. Parameter - /// \p IsLegacyLineExpr indicates whether \p Expr should be a legacy @LINE - /// expression and \p Context points to the class instance holding the live - /// string and numeric variables. \returns a pointer to the class instance - /// representing the AST of the expression whose value must be substitued, or - /// an error holding a diagnostic against \p SM if parsing fails. If - /// substitution was successful, sets \p DefinedNumericVariable to point to - /// the class representing the numeric variable defined in this numeric - /// substitution block, or None if this block does not define any variable. - static Expected> - parseNumericSubstitutionBlock( - StringRef Expr, - Optional &DefinedNumericVariable, - bool IsLegacyLineExpr, Optional LineNumber, - FileCheckPatternContext *Context, const SourceMgr &SM); - /// Parses the pattern in \p PatternStr and initializes this FileCheckPattern - /// instance accordingly. - /// - /// \p Prefix provides which prefix is being matched, \p Req describes the - /// global options that influence the parsing such as whitespace - /// canonicalization, \p SM provides the SourceMgr used for error reports. - /// \returns true in case of an error, false otherwise. - bool parsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, - const FileCheckRequest &Req); - /// Matches the pattern string against the input buffer \p Buffer - /// - /// \returns the position that is matched or an error indicating why matching - /// failed. If there is a match, updates \p MatchLen with the size of the - /// matched string. - /// - /// The GlobalVariableTable StringMap in the FileCheckPatternContext class - /// instance provides the current values of FileCheck string variables and - /// is updated if this match defines new values. Likewise, the - /// GlobalNumericVariableTable StringMap in the same class provides the - /// current values of FileCheck numeric variables and is updated if this - /// match defines new numeric values. - Expected match(StringRef Buffer, size_t &MatchLen, - const SourceMgr &SM) const; - /// Prints the value of successful substitutions or the name of the undefined - /// string or numeric variables preventing a successful substitution. - void printSubstitutions(const SourceMgr &SM, StringRef Buffer, - SMRange MatchRange = None) const; - void printFuzzyMatch(const SourceMgr &SM, StringRef Buffer, - std::vector *Diags) const; - - bool hasVariable() const { - return !(Substitutions.empty() && VariableDefs.empty()); - } - - Check::FileCheckType getCheckTy() const { return CheckTy; } - - int getCount() const { return CheckTy.getCount(); } - -private: - bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); - void AddBackrefToRegEx(unsigned BackrefNum); - /// Computes an arbitrary estimate for the quality of matching this pattern - /// at the start of \p Buffer; a distance of zero should correspond to a - /// perfect match. - unsigned computeMatchDistance(StringRef Buffer) const; - /// Finds the closing sequence of a regex variable usage or definition. - /// - /// \p Str has to point in the beginning of the definition (right after the - /// opening sequence). \p SM holds the SourceMgr used for error repporting. - /// \returns the offset of the closing sequence within Str, or npos if it - /// was not found. - size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); - - /// Parses \p Expr for the name of a numeric variable to be defined at line - /// \p LineNumber, or before input is parsed if \p LineNumber is None. - /// \returns a pointer to the class instance representing that variable, - /// creating it if needed, or an error holding a diagnostic against \p SM - /// should defining such a variable be invalid. - static Expected parseNumericVariableDefinition( - StringRef &Expr, FileCheckPatternContext *Context, - Optional LineNumber, const SourceMgr &SM); - /// Parses \p Name as a (pseudo if \p IsPseudo is true) numeric variable use - /// at line \p LineNumber, or before input is parsed if \p LineNumber is - /// None. Parameter \p Context points to the class instance holding the live - /// string and numeric variables. \returns the pointer to the class instance - /// representing that variable if successful, or an error holding a - /// diagnostic against \p SM otherwise. - static Expected> - parseNumericVariableUse(StringRef Name, bool IsPseudo, - Optional LineNumber, - FileCheckPatternContext *Context, - const SourceMgr &SM); - enum class AllowedOperand { LineVar, Literal, Any }; - /// Parses \p Expr for use of a numeric operand at line \p LineNumber, or - /// before input is parsed if \p LineNumber is None. Accepts both literal - /// values and numeric variables, depending on the value of \p AO. Parameter - /// \p Context points to the class instance holding the live string and - /// numeric variables. \returns the class representing that operand in the - /// AST of the expression or an error holding a diagnostic against \p SM - /// otherwise. - static Expected> - parseNumericOperand(StringRef &Expr, AllowedOperand AO, - Optional LineNumber, - FileCheckPatternContext *Context, const SourceMgr &SM); - /// Parses \p Expr for a binary operation at line \p LineNumber, or before - /// input is parsed if \p LineNumber is None. The left operand of this binary - /// operation is given in \p LeftOp and \p IsLegacyLineExpr indicates whether - /// we are parsing a legacy @LINE expression. Parameter \p Context points to - /// the class instance holding the live string and numeric variables. - /// \returns the class representing the binary operation in the AST of the - /// expression, or an error holding a diagnostic against \p SM otherwise. - static Expected> - parseBinop(StringRef &Expr, std::unique_ptr LeftOp, - bool IsLegacyLineExpr, Optional LineNumber, - FileCheckPatternContext *Context, const SourceMgr &SM); -}; - -//===----------------------------------------------------------------------===// -// Check Strings. -//===----------------------------------------------------------------------===// - -/// A check that we found in the input file. -struct FileCheckString { - /// The pattern to match. - FileCheckPattern Pat; - - /// Which prefix name this check matched. - StringRef Prefix; - - /// The location in the match file that the check string was specified. - SMLoc Loc; - - /// All of the strings that are disallowed from occurring between this match - /// string and the previous one (or start of file). - std::vector DagNotStrings; - - FileCheckString(const FileCheckPattern &P, StringRef S, SMLoc L) - : Pat(P), Prefix(S), Loc(L) {} - - /// Matches check string and its "not strings" and/or "dag strings". - size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, - size_t &MatchLen, FileCheckRequest &Req, - std::vector *Diags) const; - - /// Verifies that there is a single line in the given \p Buffer. Errors are - /// reported against \p SM. - bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; - /// Verifies that there is no newline in the given \p Buffer. Errors are - /// reported against \p SM. - bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; - /// Verifies that none of the strings in \p NotStrings are found in the given - /// \p Buffer. Errors are reported against \p SM and diagnostics recorded in - /// \p Diags according to the verbosity level set in \p Req. - bool CheckNot(const SourceMgr &SM, StringRef Buffer, - const std::vector &NotStrings, - const FileCheckRequest &Req, - std::vector *Diags) const; - /// Matches "dag strings" and their mixed "not strings". - size_t CheckDag(const SourceMgr &SM, StringRef Buffer, - std::vector &NotStrings, - const FileCheckRequest &Req, - std::vector *Diags) const; -}; - -} // namespace llvm - -#endif +//===-- FileCheckImpl.h - Private FileCheck Interface ------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the private interfaces of FileCheck. Its purpose is to +// allow unit testing of FileCheck and to separate the interface from the +// implementation. It is only meant to be used by FileCheck. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_SUPPORT_FILECHECKIMPL_H +#define LLVM_LIB_SUPPORT_FILECHECKIMPL_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/SourceMgr.h" +#include +#include +#include + +namespace llvm { + +//===----------------------------------------------------------------------===// +// Numeric substitution handling code. +//===----------------------------------------------------------------------===// + +/// Base class representing the AST of a given expression. +class FileCheckExpressionAST { +public: + virtual ~FileCheckExpressionAST() = default; + + /// Evaluates and \returns the value of the expression represented by this + /// AST or an error if evaluation fails. + virtual Expected eval() const = 0; +}; + +/// Class representing an unsigned literal in the AST of an expression. +class FileCheckExpressionLiteral : public FileCheckExpressionAST { +private: + /// Actual value of the literal. + uint64_t Value; + +public: + /// Constructs a literal with the specified value. + FileCheckExpressionLiteral(uint64_t Val) : Value(Val) {} + + /// \returns the literal's value. + Expected eval() const { return Value; } +}; + +/// Class to represent an undefined variable error, which quotes that +/// variable's name when printed. +class FileCheckUndefVarError : public ErrorInfo { +private: + StringRef VarName; + +public: + static char ID; + + FileCheckUndefVarError(StringRef VarName) : VarName(VarName) {} + + StringRef getVarName() const { return VarName; } + + std::error_code convertToErrorCode() const override { + return inconvertibleErrorCode(); + } + + /// Print name of variable associated with this error. + void log(raw_ostream &OS) const override { + OS << "\""; + OS.write_escaped(VarName) << "\""; + } +}; + +/// Class representing a numeric variable and its associated current value. +class FileCheckNumericVariable { +private: + /// Name of the numeric variable. + StringRef Name; + + /// Value of numeric variable, if defined, or None otherwise. + Optional Value; + + /// Line number where this variable is defined, or None if defined before + /// input is parsed. Used to determine whether a variable is defined on the + /// same line as a given use. + Optional DefLineNumber; + +public: + /// Constructor for a variable \p Name defined at line \p DefLineNumber or + /// defined before input is parsed if \p DefLineNumber is None. + explicit FileCheckNumericVariable(StringRef Name, + Optional DefLineNumber = None) + : Name(Name), DefLineNumber(DefLineNumber) {} + + /// \returns name of this numeric variable. + StringRef getName() const { return Name; } + + /// \returns this variable's value. + Optional getValue() const { return Value; } + + /// Sets value of this numeric variable to \p NewValue. + void setValue(uint64_t NewValue) { Value = NewValue; } + + /// Clears value of this numeric variable, regardless of whether it is + /// currently defined or not. + void clearValue() { Value = None; } + + /// \returns the line number where this variable is defined, if any, or None + /// if defined before input is parsed. + Optional getDefLineNumber() { return DefLineNumber; } +}; + +/// Class representing the use of a numeric variable in the AST of an +/// expression. +class FileCheckNumericVariableUse : public FileCheckExpressionAST { +private: + /// Name of the numeric variable. + StringRef Name; + + /// Pointer to the class instance for the variable this use is about. + FileCheckNumericVariable *NumericVariable; + +public: + FileCheckNumericVariableUse(StringRef Name, + FileCheckNumericVariable *NumericVariable) + : Name(Name), NumericVariable(NumericVariable) {} + + /// \returns the value of the variable referenced by this instance. + Expected eval() const; +}; + +/// Type of functions evaluating a given binary operation. +using binop_eval_t = uint64_t (*)(uint64_t, uint64_t); + +/// Class representing a single binary operation in the AST of an expression. +class FileCheckASTBinop : public FileCheckExpressionAST { +private: + /// Left operand. + std::unique_ptr LeftOperand; + + /// Right operand. + std::unique_ptr RightOperand; + + /// Pointer to function that can evaluate this binary operation. + binop_eval_t EvalBinop; + +public: + FileCheckASTBinop(binop_eval_t EvalBinop, + std::unique_ptr LeftOp, + std::unique_ptr RightOp) + : EvalBinop(EvalBinop) { + LeftOperand = std::move(LeftOp); + RightOperand = std::move(RightOp); + } + + /// Evaluates the value of the binary operation represented by this AST, + /// using EvalBinop on the result of recursively evaluating the operands. + /// \returns the expression value or an error if an undefined numeric + /// variable is used in one of the operands. + Expected eval() const; +}; + +class FileCheckPatternContext; + +/// Class representing a substitution to perform in the RegExStr string. +class FileCheckSubstitution { +protected: + /// Pointer to a class instance holding, among other things, the table with + /// the values of live string variables at the start of any given CHECK line. + /// Used for substituting string variables with the text they were defined + /// as. Expressions are linked to the numeric variables they use at + /// parse time and directly access the value of the numeric variable to + /// evaluate their value. + FileCheckPatternContext *Context; + + /// The string that needs to be substituted for something else. For a + /// string variable this is its name, otherwise this is the whole expression. + StringRef FromStr; + + // Index in RegExStr of where to do the substitution. + size_t InsertIdx; + +public: + FileCheckSubstitution(FileCheckPatternContext *Context, StringRef VarName, + size_t InsertIdx) + : Context(Context), FromStr(VarName), InsertIdx(InsertIdx) {} + + virtual ~FileCheckSubstitution() = default; + + /// \returns the string to be substituted for something else. + StringRef getFromString() const { return FromStr; } + + /// \returns the index where the substitution is to be performed in RegExStr. + size_t getIndex() const { return InsertIdx; } + + /// \returns a string containing the result of the substitution represented + /// by this class instance or an error if substitution failed. + virtual Expected getResult() const = 0; +}; + +class FileCheckStringSubstitution : public FileCheckSubstitution { +public: + FileCheckStringSubstitution(FileCheckPatternContext *Context, + StringRef VarName, size_t InsertIdx) + : FileCheckSubstitution(Context, VarName, InsertIdx) {} + + /// \returns the text that the string variable in this substitution matched + /// when defined, or an error if the variable is undefined. + Expected getResult() const override; +}; + +class FileCheckNumericSubstitution : public FileCheckSubstitution { +private: + /// Pointer to the class representing the expression whose value is to be + /// substituted. + std::unique_ptr ExpressionAST; + +public: + FileCheckNumericSubstitution(FileCheckPatternContext *Context, StringRef Expr, + std::unique_ptr ExprAST, + size_t InsertIdx) + : FileCheckSubstitution(Context, Expr, InsertIdx) { + ExpressionAST = std::move(ExprAST); + } + + /// \returns a string containing the result of evaluating the expression in + /// this substitution, or an error if evaluation failed. + Expected getResult() const override; +}; + +//===----------------------------------------------------------------------===// +// Pattern handling code. +//===----------------------------------------------------------------------===// + +struct FileCheckDiag; + +/// Class holding the FileCheckPattern global state, shared by all patterns: +/// tables holding values of variables and whether they are defined or not at +/// any given time in the matching process. +class FileCheckPatternContext { + friend class FileCheckPattern; + +private: + /// When matching a given pattern, this holds the value of all the string + /// variables defined in previous patterns. In a pattern, only the last + /// definition for a given variable is recorded in this table. + /// Back-references are used for uses after any the other definition. + StringMap GlobalVariableTable; + + /// Map of all string variables defined so far. Used at parse time to detect + /// a name conflict between a numeric variable and a string variable when + /// the former is defined on a later line than the latter. + StringMap DefinedVariableTable; + + /// When matching a given pattern, this holds the pointers to the classes + /// representing the numeric variables defined in previous patterns. When + /// matching a pattern all definitions for that pattern are recorded in the + /// NumericVariableDefs table in the FileCheckPattern instance of that + /// pattern. + StringMap GlobalNumericVariableTable; + + /// Pointer to the class instance representing the @LINE pseudo variable for + /// easily updating its value. + FileCheckNumericVariable *LineVariable = nullptr; + + /// Vector holding pointers to all parsed numeric variables. Used to + /// automatically free them once they are guaranteed to no longer be used. + std::vector> NumericVariables; + + /// Vector holding pointers to all substitutions. Used to automatically free + /// them once they are guaranteed to no longer be used. + std::vector> Substitutions; + +public: + /// \returns the value of string variable \p VarName or an error if no such + /// variable has been defined. + Expected getPatternVarValue(StringRef VarName); + + /// Defines string and numeric variables from definitions given on the + /// command line, passed as a vector of [#]VAR=VAL strings in + /// \p CmdlineDefines. \returns an error list containing diagnostics against + /// \p SM for all definition parsing failures, if any, or Success otherwise. + Error defineCmdlineVariables(std::vector &CmdlineDefines, + SourceMgr &SM); + + /// Create @LINE pseudo variable. Value is set when pattern are being + /// matched. + void createLineVariable(); + + /// Undefines local variables (variables whose name does not start with a '$' + /// sign), i.e. removes them from GlobalVariableTable and from + /// GlobalNumericVariableTable and also clears the value of numeric + /// variables. + void clearLocalVars(); + +private: + /// Makes a new numeric variable and registers it for destruction when the + /// context is destroyed. + template + FileCheckNumericVariable *makeNumericVariable(Types... args); + + /// Makes a new string substitution and registers it for destruction when the + /// context is destroyed. + FileCheckSubstitution *makeStringSubstitution(StringRef VarName, + size_t InsertIdx); + + /// Makes a new numeric substitution and registers it for destruction when + /// the context is destroyed. + FileCheckSubstitution * + makeNumericSubstitution(StringRef ExpressionStr, + std::unique_ptr ExpressionAST, + size_t InsertIdx); +}; + +/// Class to represent an error holding a diagnostic with location information +/// used when printing it. +class FileCheckErrorDiagnostic : public ErrorInfo { +private: + SMDiagnostic Diagnostic; + +public: + static char ID; + + FileCheckErrorDiagnostic(SMDiagnostic &&Diag) : Diagnostic(Diag) {} + + std::error_code convertToErrorCode() const override { + return inconvertibleErrorCode(); + } + + /// Print diagnostic associated with this error when printing the error. + void log(raw_ostream &OS) const override { Diagnostic.print(nullptr, OS); } + + static Error get(const SourceMgr &SM, SMLoc Loc, const Twine &ErrMsg) { + return make_error( + SM.GetMessage(Loc, SourceMgr::DK_Error, ErrMsg)); + } + + static Error get(const SourceMgr &SM, StringRef Buffer, const Twine &ErrMsg) { + return get(SM, SMLoc::getFromPointer(Buffer.data()), ErrMsg); + } +}; + +class FileCheckNotFoundError : public ErrorInfo { +public: + static char ID; + + std::error_code convertToErrorCode() const override { + return inconvertibleErrorCode(); + } + + /// Print diagnostic associated with this error when printing the error. + void log(raw_ostream &OS) const override { + OS << "String not found in input"; + } +}; + +class FileCheckPattern { + SMLoc PatternLoc; + + /// A fixed string to match as the pattern or empty if this pattern requires + /// a regex match. + StringRef FixedStr; + + /// A regex string to match as the pattern or empty if this pattern requires + /// a fixed string to match. + std::string RegExStr; + + /// Entries in this vector represent a substitution of a string variable or + /// an expression in the RegExStr regex at match time. For example, in the + /// case of a CHECK directive with the pattern "foo[[bar]]baz[[#N+1]]", + /// RegExStr will contain "foobaz" and we'll get two entries in this vector + /// that tells us to insert the value of string variable "bar" at offset 3 + /// and the value of expression "N+1" at offset 6. + std::vector Substitutions; + + /// Maps names of string variables defined in a pattern to the number of + /// their parenthesis group in RegExStr capturing their last definition. + /// + /// E.g. for the pattern "foo[[bar:.*]]baz([[bar]][[QUUX]][[bar:.*]])", + /// RegExStr will be "foo(.*)baz(\1(.*))" where is + /// the value captured for QUUX on the earlier line where it was defined, and + /// VariableDefs will map "bar" to the third parenthesis group which captures + /// the second definition of "bar". + /// + /// Note: uses std::map rather than StringMap to be able to get the key when + /// iterating over values. + std::map VariableDefs; + + /// Structure representing the definition of a numeric variable in a pattern. + /// It holds the pointer to the class representing the numeric variable whose + /// value is being defined and the number of the parenthesis group in + /// RegExStr to capture that value. + struct FileCheckNumericVariableMatch { + /// Pointer to class representing the numeric variable whose value is being + /// defined. + FileCheckNumericVariable *DefinedNumericVariable; + + /// Number of the parenthesis group in RegExStr that captures the value of + /// this numeric variable definition. + unsigned CaptureParenGroup; + }; + + /// Holds the number of the parenthesis group in RegExStr and pointer to the + /// corresponding FileCheckNumericVariable class instance of all numeric + /// variable definitions. Used to set the matched value of all those + /// variables. + StringMap NumericVariableDefs; + + /// Pointer to a class instance holding the global state shared by all + /// patterns: + /// - separate tables with the values of live string and numeric variables + /// respectively at the start of any given CHECK line; + /// - table holding whether a string variable has been defined at any given + /// point during the parsing phase. + FileCheckPatternContext *Context; + + Check::FileCheckType CheckTy; + + /// Line number for this CHECK pattern or None if it is an implicit pattern. + /// Used to determine whether a variable definition is made on an earlier + /// line to the one with this CHECK. + Optional LineNumber; + +public: + FileCheckPattern(Check::FileCheckType Ty, FileCheckPatternContext *Context, + Optional Line = None) + : Context(Context), CheckTy(Ty), LineNumber(Line) {} + + /// \returns the location in source code. + SMLoc getLoc() const { return PatternLoc; } + + /// \returns the pointer to the global state for all patterns in this + /// FileCheck instance. + FileCheckPatternContext *getContext() const { return Context; } + + /// \returns whether \p C is a valid first character for a variable name. + static bool isValidVarNameStart(char C); + + /// Parsing information about a variable. + struct VariableProperties { + StringRef Name; + bool IsPseudo; + }; + + /// Parses the string at the start of \p Str for a variable name. \returns + /// a VariableProperties structure holding the variable name and whether it + /// is the name of a pseudo variable, or an error holding a diagnostic + /// against \p SM if parsing fail. If parsing was successful, also strips + /// \p Str from the variable name. + static Expected parseVariable(StringRef &Str, + const SourceMgr &SM); + /// Parses \p Expr for a numeric substitution block at line \p LineNumber, + /// or before input is parsed if \p LineNumber is None. Parameter + /// \p IsLegacyLineExpr indicates whether \p Expr should be a legacy @LINE + /// expression and \p Context points to the class instance holding the live + /// string and numeric variables. \returns a pointer to the class instance + /// representing the AST of the expression whose value must be substitued, or + /// an error holding a diagnostic against \p SM if parsing fails. If + /// substitution was successful, sets \p DefinedNumericVariable to point to + /// the class representing the numeric variable defined in this numeric + /// substitution block, or None if this block does not define any variable. + static Expected> + parseNumericSubstitutionBlock( + StringRef Expr, + Optional &DefinedNumericVariable, + bool IsLegacyLineExpr, Optional LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM); + /// Parses the pattern in \p PatternStr and initializes this FileCheckPattern + /// instance accordingly. + /// + /// \p Prefix provides which prefix is being matched, \p Req describes the + /// global options that influence the parsing such as whitespace + /// canonicalization, \p SM provides the SourceMgr used for error reports. + /// \returns true in case of an error, false otherwise. + bool parsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, + const FileCheckRequest &Req); + /// Matches the pattern string against the input buffer \p Buffer + /// + /// \returns the position that is matched or an error indicating why matching + /// failed. If there is a match, updates \p MatchLen with the size of the + /// matched string. + /// + /// The GlobalVariableTable StringMap in the FileCheckPatternContext class + /// instance provides the current values of FileCheck string variables and + /// is updated if this match defines new values. Likewise, the + /// GlobalNumericVariableTable StringMap in the same class provides the + /// current values of FileCheck numeric variables and is updated if this + /// match defines new numeric values. + Expected match(StringRef Buffer, size_t &MatchLen, + const SourceMgr &SM) const; + /// Prints the value of successful substitutions or the name of the undefined + /// string or numeric variables preventing a successful substitution. + void printSubstitutions(const SourceMgr &SM, StringRef Buffer, + SMRange MatchRange = None) const; + void printFuzzyMatch(const SourceMgr &SM, StringRef Buffer, + std::vector *Diags) const; + + bool hasVariable() const { + return !(Substitutions.empty() && VariableDefs.empty()); + } + + Check::FileCheckType getCheckTy() const { return CheckTy; } + + int getCount() const { return CheckTy.getCount(); } + +private: + bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); + void AddBackrefToRegEx(unsigned BackrefNum); + /// Computes an arbitrary estimate for the quality of matching this pattern + /// at the start of \p Buffer; a distance of zero should correspond to a + /// perfect match. + unsigned computeMatchDistance(StringRef Buffer) const; + /// Finds the closing sequence of a regex variable usage or definition. + /// + /// \p Str has to point in the beginning of the definition (right after the + /// opening sequence). \p SM holds the SourceMgr used for error repporting. + /// \returns the offset of the closing sequence within Str, or npos if it + /// was not found. + size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); + + /// Parses \p Expr for the name of a numeric variable to be defined at line + /// \p LineNumber, or before input is parsed if \p LineNumber is None. + /// \returns a pointer to the class instance representing that variable, + /// creating it if needed, or an error holding a diagnostic against \p SM + /// should defining such a variable be invalid. + static Expected parseNumericVariableDefinition( + StringRef &Expr, FileCheckPatternContext *Context, + Optional LineNumber, const SourceMgr &SM); + /// Parses \p Name as a (pseudo if \p IsPseudo is true) numeric variable use + /// at line \p LineNumber, or before input is parsed if \p LineNumber is + /// None. Parameter \p Context points to the class instance holding the live + /// string and numeric variables. \returns the pointer to the class instance + /// representing that variable if successful, or an error holding a + /// diagnostic against \p SM otherwise. + static Expected> + parseNumericVariableUse(StringRef Name, bool IsPseudo, + Optional LineNumber, + FileCheckPatternContext *Context, + const SourceMgr &SM); + enum class AllowedOperand { LineVar, Literal, Any }; + /// Parses \p Expr for use of a numeric operand at line \p LineNumber, or + /// before input is parsed if \p LineNumber is None. Accepts both literal + /// values and numeric variables, depending on the value of \p AO. Parameter + /// \p Context points to the class instance holding the live string and + /// numeric variables. \returns the class representing that operand in the + /// AST of the expression or an error holding a diagnostic against \p SM + /// otherwise. + static Expected> + parseNumericOperand(StringRef &Expr, AllowedOperand AO, + Optional LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM); + /// Parses \p Expr for a binary operation at line \p LineNumber, or before + /// input is parsed if \p LineNumber is None. The left operand of this binary + /// operation is given in \p LeftOp and \p IsLegacyLineExpr indicates whether + /// we are parsing a legacy @LINE expression. Parameter \p Context points to + /// the class instance holding the live string and numeric variables. + /// \returns the class representing the binary operation in the AST of the + /// expression, or an error holding a diagnostic against \p SM otherwise. + static Expected> + parseBinop(StringRef &Expr, std::unique_ptr LeftOp, + bool IsLegacyLineExpr, Optional LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM); +}; + +//===----------------------------------------------------------------------===// +// Check Strings. +//===----------------------------------------------------------------------===// + +/// A check that we found in the input file. +struct FileCheckString { + /// The pattern to match. + FileCheckPattern Pat; + + /// Which prefix name this check matched. + StringRef Prefix; + + /// The location in the match file that the check string was specified. + SMLoc Loc; + + /// All of the strings that are disallowed from occurring between this match + /// string and the previous one (or start of file). + std::vector DagNotStrings; + + FileCheckString(const FileCheckPattern &P, StringRef S, SMLoc L) + : Pat(P), Prefix(S), Loc(L) {} + + /// Matches check string and its "not strings" and/or "dag strings". + size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, + size_t &MatchLen, FileCheckRequest &Req, + std::vector *Diags) const; + + /// Verifies that there is a single line in the given \p Buffer. Errors are + /// reported against \p SM. + bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; + /// Verifies that there is no newline in the given \p Buffer. Errors are + /// reported against \p SM. + bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; + /// Verifies that none of the strings in \p NotStrings are found in the given + /// \p Buffer. Errors are reported against \p SM and diagnostics recorded in + /// \p Diags according to the verbosity level set in \p Req. + bool CheckNot(const SourceMgr &SM, StringRef Buffer, + const std::vector &NotStrings, + const FileCheckRequest &Req, + std::vector *Diags) const; + /// Matches "dag strings" and their mixed "not strings". + size_t CheckDag(const SourceMgr &SM, StringRef Buffer, + std::vector &NotStrings, + const FileCheckRequest &Req, + std::vector *Diags) const; +}; + +} // namespace llvm + +#endif diff --git a/test/FileCheck/check-ignore-case.txt b/test/FileCheck/check-ignore-case.txt deleted file mode 100644 index 8721c3f0158..00000000000 --- a/test/FileCheck/check-ignore-case.txt +++ /dev/null @@ -1,45 +0,0 @@ -## Check that a full line is matched case insensitively. -# RUN: FileCheck --ignore-case --match-full-lines --check-prefix=FULL --input-file=%s %s - -## Check that a regular expression matches case insensitively. -# RUN: FileCheck --ignore-case --check-prefix=REGEX --input-file=%s %s - -## Check that a pattern from command line matches case insensitively. -# RUN: FileCheck --ignore-case --check-prefix=PAT --DPATTERN="THIS is the" --input-file=%s %s - -## Check that COUNT and NEXT work case insensitively. -# RUN: FileCheck --ignore-case --check-prefix=CNT --input-file=%s %s - -## Check that match on same line works case insensitively. -# RUN: FileCheck --ignore-case --check-prefix=LINE --input-file=%s %s - -## Check that option --implicit-not works case insensitively. -# RUN: sed '/^#/d' %s | FileCheck --implicit-check-not=sTrInG %s -# RUN: sed '/^#/d' %s | not FileCheck --ignore-case --implicit-check-not=sTrInG %s 2>&1 | FileCheck --check-prefix=ERROR %s - -this is the STRING to be matched - -# FULL: tHis iS The String TO be matched -# REGEX: s{{TRing}} -# PAT: [[PATTERN]] string - -Loop 1 -lOop 2 -loOp 3 -looP 4 -loop 5 -LOOP 6 -BREAK - -# CNT-COUNT-6: LOop {{[0-9]}} -# CNT-NOT: loop -# CNT-NEXT: break - -One Line To Match - -# LINE: {{o}}ne line -# LINE-SAME: {{t}}o match - -# ERROR: command line:1:{{[0-9]+]}}: error: CHECK-NOT: excluded string found in input -# ERROR-NEXT: -implicit-check-not='sTrInG' -# ERROR: note: found here diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp index 4d2cd1930d6..8718be28ac9 100644 --- a/utils/FileCheck/FileCheck.cpp +++ b/utils/FileCheck/FileCheck.cpp @@ -1,656 +1,651 @@ -//===- FileCheck.cpp - Check that File's Contents match what is expected --===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// FileCheck does a line-by line check of a file that validates whether it -// contains the expected content. This is useful for regression tests etc. -// -// This program exits with an exit status of 2 on error, exit status of 0 if -// the file matched the expected contents, and exit status of 1 if it did not -// contain the expected contents. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/InitLLVM.h" -#include "llvm/Support/Process.h" -#include "llvm/Support/WithColor.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/FileCheck.h" -#include -using namespace llvm; - -static cl::extrahelp FileCheckOptsEnv( - "\nOptions are parsed from the environment variable FILECHECK_OPTS and\n" - "from the command line.\n"); - -static cl::opt - CheckFilename(cl::Positional, cl::desc(""), cl::Optional); - -static cl::opt - InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), - cl::init("-"), cl::value_desc("filename")); - -static cl::list CheckPrefixes( - "check-prefix", - cl::desc("Prefix to use from check file (defaults to 'CHECK')")); -static cl::alias CheckPrefixesAlias( - "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated, - cl::NotHidden, - cl::desc( - "Alias for -check-prefix permitting multiple comma separated values")); - -static cl::opt NoCanonicalizeWhiteSpace( - "strict-whitespace", - cl::desc("Do not treat all horizontal whitespace as equivalent")); - -static cl::opt IgnoreCase( - "ignore-case", - cl::desc("Use case-insensitive matching")); - -static cl::list ImplicitCheckNot( - "implicit-check-not", - cl::desc("Add an implicit negative check with this pattern to every\n" - "positive check. This can be used to ensure that no instances of\n" - "this pattern occur which are not matched by a positive pattern"), - cl::value_desc("pattern")); - -static cl::list - GlobalDefines("D", cl::AlwaysPrefix, - cl::desc("Define a variable to be used in capture patterns."), - cl::value_desc("VAR=VALUE")); - -static cl::opt AllowEmptyInput( - "allow-empty", cl::init(false), - cl::desc("Allow the input file to be empty. This is useful when making\n" - "checks that some error message does not occur, for example.")); - -static cl::opt MatchFullLines( - "match-full-lines", cl::init(false), - cl::desc("Require all positive matches to cover an entire input line.\n" - "Allows leading and trailing whitespace if --strict-whitespace\n" - "is not also passed.")); - -static cl::opt EnableVarScope( - "enable-var-scope", cl::init(false), - cl::desc("Enables scope for regex variables. Variables with names that\n" - "do not start with '$' will be reset at the beginning of\n" - "each CHECK-LABEL block.")); - -static cl::opt AllowDeprecatedDagOverlap( - "allow-deprecated-dag-overlap", cl::init(false), - cl::desc("Enable overlapping among matches in a group of consecutive\n" - "CHECK-DAG directives. This option is deprecated and is only\n" - "provided for convenience as old tests are migrated to the new\n" - "non-overlapping CHECK-DAG implementation.\n")); - -static cl::opt Verbose( - "v", cl::init(false), - cl::desc("Print directive pattern matches, or add them to the input dump\n" - "if enabled.\n")); - -static cl::opt VerboseVerbose( - "vv", cl::init(false), - cl::desc("Print information helpful in diagnosing internal FileCheck\n" - "issues, or add it to the input dump if enabled. Implies\n" - "-v.\n")); -static const char * DumpInputEnv = "FILECHECK_DUMP_INPUT_ON_FAILURE"; - -static cl::opt DumpInputOnFailure( - "dump-input-on-failure", - cl::init(std::getenv(DumpInputEnv) && *std::getenv(DumpInputEnv)), - cl::desc("Dump original input to stderr before failing.\n" - "The value can be also controlled using\n" - "FILECHECK_DUMP_INPUT_ON_FAILURE environment variable.\n" - "This option is deprecated in favor of -dump-input=fail.\n")); - -enum DumpInputValue { - DumpInputDefault, - DumpInputHelp, - DumpInputNever, - DumpInputFail, - DumpInputAlways -}; - -static cl::opt DumpInput( - "dump-input", cl::init(DumpInputDefault), - cl::desc("Dump input to stderr, adding annotations representing\n" - " currently enabled diagnostics\n"), - cl::value_desc("mode"), - cl::values(clEnumValN(DumpInputHelp, "help", - "Explain dump format and quit"), - clEnumValN(DumpInputNever, "never", "Never dump input"), - clEnumValN(DumpInputFail, "fail", "Dump input on failure"), - clEnumValN(DumpInputAlways, "always", "Always dump input"))); - -typedef cl::list::const_iterator prefix_iterator; - - - - - - - -static void DumpCommandLine(int argc, char **argv) { - errs() << "FileCheck command line: "; - for (int I = 0; I < argc; I++) - errs() << " " << argv[I]; - errs() << "\n"; -} - -struct MarkerStyle { - /// The starting char (before tildes) for marking the line. - char Lead; - /// What color to use for this annotation. - raw_ostream::Colors Color; - /// A note to follow the marker, or empty string if none. - std::string Note; - MarkerStyle() {} - MarkerStyle(char Lead, raw_ostream::Colors Color, - const std::string &Note = "") - : Lead(Lead), Color(Color), Note(Note) {} -}; - -static MarkerStyle GetMarker(FileCheckDiag::MatchType MatchTy) { - switch (MatchTy) { - case FileCheckDiag::MatchFoundAndExpected: - return MarkerStyle('^', raw_ostream::GREEN); - case FileCheckDiag::MatchFoundButExcluded: - return MarkerStyle('!', raw_ostream::RED, "error: no match expected"); - case FileCheckDiag::MatchFoundButWrongLine: - return MarkerStyle('!', raw_ostream::RED, "error: match on wrong line"); - case FileCheckDiag::MatchFoundButDiscarded: - return MarkerStyle('!', raw_ostream::CYAN, - "discard: overlaps earlier match"); - case FileCheckDiag::MatchNoneAndExcluded: - return MarkerStyle('X', raw_ostream::GREEN); - case FileCheckDiag::MatchNoneButExpected: - return MarkerStyle('X', raw_ostream::RED, "error: no match found"); - case FileCheckDiag::MatchFuzzy: - return MarkerStyle('?', raw_ostream::MAGENTA, "possible intended match"); - } - llvm_unreachable_internal("unexpected match type"); -} - -static void DumpInputAnnotationHelp(raw_ostream &OS) { - OS << "The following description was requested by -dump-input=help to\n" - << "explain the input annotations printed by -dump-input=always and\n" - << "-dump-input=fail:\n\n"; - - // Labels for input lines. - OS << " - "; - WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "L:"; - OS << " labels line number L of the input file\n"; - - // Labels for annotation lines. - OS << " - "; - WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "T:L"; - OS << " labels the only match result for a pattern of type T from " - << "line L of\n" - << " the check file\n"; - OS << " - "; - WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "T:L'N"; - OS << " labels the Nth match result for a pattern of type T from line " - << "L of\n" - << " the check file\n"; - - // Markers on annotation lines. - OS << " - "; - WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "^~~"; - OS << " marks good match (reported if -v)\n" - << " - "; - WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "!~~"; - OS << " marks bad match, such as:\n" - << " - CHECK-NEXT on same line as previous match (error)\n" - << " - CHECK-NOT found (error)\n" - << " - CHECK-DAG overlapping match (discarded, reported if " - << "-vv)\n" - << " - "; - WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "X~~"; - OS << " marks search range when no match is found, such as:\n" - << " - CHECK-NEXT not found (error)\n" - << " - CHECK-NOT not found (success, reported if -vv)\n" - << " - CHECK-DAG not found after discarded matches (error)\n" - << " - "; - WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "?"; - OS << " marks fuzzy match when no match is found\n"; - - // Colors. - OS << " - colors "; - WithColor(OS, raw_ostream::GREEN, true) << "success"; - OS << ", "; - WithColor(OS, raw_ostream::RED, true) << "error"; - OS << ", "; - WithColor(OS, raw_ostream::MAGENTA, true) << "fuzzy match"; - OS << ", "; - WithColor(OS, raw_ostream::CYAN, true, false) << "discarded match"; - OS << ", "; - WithColor(OS, raw_ostream::CYAN, true, true) << "unmatched input"; - OS << "\n\n" - << "If you are not seeing color above or in input dumps, try: -color\n"; -} - -/// An annotation for a single input line. -struct InputAnnotation { - /// The check file line (one-origin indexing) where the directive that - /// produced this annotation is located. - unsigned CheckLine; - /// The index of the match result for this check. - unsigned CheckDiagIndex; - /// The label for this annotation. - std::string Label; - /// What input line (one-origin indexing) this annotation marks. This might - /// be different from the starting line of the original diagnostic if this is - /// a non-initial fragment of a diagnostic that has been broken across - /// multiple lines. - unsigned InputLine; - /// The column range (one-origin indexing, open end) in which to to mark the - /// input line. If InputEndCol is UINT_MAX, treat it as the last column - /// before the newline. - unsigned InputStartCol, InputEndCol; - /// The marker to use. - MarkerStyle Marker; - /// Whether this annotation represents a good match for an expected pattern. - bool FoundAndExpectedMatch; -}; - -/// Get an abbreviation for the check type. -std::string GetCheckTypeAbbreviation(Check::FileCheckType Ty) { - switch (Ty) { - case Check::CheckPlain: - if (Ty.getCount() > 1) - return "count"; - return "check"; - case Check::CheckNext: - return "next"; - case Check::CheckSame: - return "same"; - case Check::CheckNot: - return "not"; - case Check::CheckDAG: - return "dag"; - case Check::CheckLabel: - return "label"; - case Check::CheckEmpty: - return "empty"; - case Check::CheckEOF: - return "eof"; - case Check::CheckBadNot: - return "bad-not"; - case Check::CheckBadCount: - return "bad-count"; - case Check::CheckNone: - llvm_unreachable("invalid FileCheckType"); - } - llvm_unreachable("unknown FileCheckType"); -} - -static void BuildInputAnnotations(const std::vector &Diags, - std::vector &Annotations, - unsigned &LabelWidth) { - // How many diagnostics has the current check seen so far? - unsigned CheckDiagCount = 0; - // What's the widest label? - LabelWidth = 0; - for (auto DiagItr = Diags.begin(), DiagEnd = Diags.end(); DiagItr != DiagEnd; - ++DiagItr) { - InputAnnotation A; - - // Build label, which uniquely identifies this check result. - A.CheckLine = DiagItr->CheckLine; - llvm::raw_string_ostream Label(A.Label); - Label << GetCheckTypeAbbreviation(DiagItr->CheckTy) << ":" - << DiagItr->CheckLine; - A.CheckDiagIndex = UINT_MAX; - auto DiagNext = std::next(DiagItr); - if (DiagNext != DiagEnd && DiagItr->CheckTy == DiagNext->CheckTy && - DiagItr->CheckLine == DiagNext->CheckLine) - A.CheckDiagIndex = CheckDiagCount++; - else if (CheckDiagCount) { - A.CheckDiagIndex = CheckDiagCount; - CheckDiagCount = 0; - } - if (A.CheckDiagIndex != UINT_MAX) - Label << "'" << A.CheckDiagIndex; - else - A.CheckDiagIndex = 0; - Label.flush(); - LabelWidth = std::max((std::string::size_type)LabelWidth, A.Label.size()); - - A.Marker = GetMarker(DiagItr->MatchTy); - A.FoundAndExpectedMatch = - DiagItr->MatchTy == FileCheckDiag::MatchFoundAndExpected; - - // Compute the mark location, and break annotation into multiple - // annotations if it spans multiple lines. - A.InputLine = DiagItr->InputStartLine; - A.InputStartCol = DiagItr->InputStartCol; - if (DiagItr->InputStartLine == DiagItr->InputEndLine) { - // Sometimes ranges are empty in order to indicate a specific point, but - // that would mean nothing would be marked, so adjust the range to - // include the following character. - A.InputEndCol = - std::max(DiagItr->InputStartCol + 1, DiagItr->InputEndCol); - Annotations.push_back(A); - } else { - assert(DiagItr->InputStartLine < DiagItr->InputEndLine && - "expected input range not to be inverted"); - A.InputEndCol = UINT_MAX; - Annotations.push_back(A); - for (unsigned L = DiagItr->InputStartLine + 1, E = DiagItr->InputEndLine; - L <= E; ++L) { - // If a range ends before the first column on a line, then it has no - // characters on that line, so there's nothing to render. - if (DiagItr->InputEndCol == 1 && L == E) - break; - InputAnnotation B; - B.CheckLine = A.CheckLine; - B.CheckDiagIndex = A.CheckDiagIndex; - B.Label = A.Label; - B.InputLine = L; - B.Marker = A.Marker; - B.Marker.Lead = '~'; - B.Marker.Note = ""; - B.InputStartCol = 1; - if (L != E) - B.InputEndCol = UINT_MAX; - else - B.InputEndCol = DiagItr->InputEndCol; - B.FoundAndExpectedMatch = A.FoundAndExpectedMatch; - Annotations.push_back(B); - } - } - } -} - -static void DumpAnnotatedInput(raw_ostream &OS, const FileCheckRequest &Req, - StringRef InputFileText, - std::vector &Annotations, - unsigned LabelWidth) { - OS << "Full input was:\n<<<<<<\n"; - - // Sort annotations. - // - // First, sort in the order of input lines to make it easier to find relevant - // annotations while iterating input lines in the implementation below. - // FileCheck diagnostics are not always reported and recorded in the order of - // input lines due to, for example, CHECK-DAG and CHECK-NOT. - // - // Second, for annotations for the same input line, sort in the order of the - // FileCheck directive's line in the check file (where there's at most one - // directive per line) and then by the index of the match result for that - // directive. The rationale of this choice is that, for any input line, this - // sort establishes a total order of annotations that, with respect to match - // results, is consistent across multiple lines, thus making match results - // easier to track from one line to the next when they span multiple lines. - std::sort(Annotations.begin(), Annotations.end(), - [](const InputAnnotation &A, const InputAnnotation &B) { - if (A.InputLine != B.InputLine) - return A.InputLine < B.InputLine; - if (A.CheckLine != B.CheckLine) - return A.CheckLine < B.CheckLine; - // FIXME: Sometimes CHECK-LABEL reports its match twice with - // other diagnostics in between, and then diag index incrementing - // fails to work properly, and then this assert fails. We should - // suppress one of those diagnostics or do a better job of - // computing this index. For now, we just produce a redundant - // CHECK-LABEL annotation. - // assert(A.CheckDiagIndex != B.CheckDiagIndex && - // "expected diagnostic indices to be unique within a " - // " check line"); - return A.CheckDiagIndex < B.CheckDiagIndex; - }); - - // Compute the width of the label column. - const unsigned char *InputFilePtr = InputFileText.bytes_begin(), - *InputFileEnd = InputFileText.bytes_end(); - unsigned LineCount = InputFileText.count('\n'); - if (InputFileEnd[-1] != '\n') - ++LineCount; - unsigned LineNoWidth = std::log10(LineCount) + 1; - // +3 below adds spaces (1) to the left of the (right-aligned) line numbers - // on input lines and (2) to the right of the (left-aligned) labels on - // annotation lines so that input lines and annotation lines are more - // visually distinct. For example, the spaces on the annotation lines ensure - // that input line numbers and check directive line numbers never align - // horizontally. Those line numbers might not even be for the same file. - // One space would be enough to achieve that, but more makes it even easier - // to see. - LabelWidth = std::max(LabelWidth, LineNoWidth) + 3; - - // Print annotated input lines. - auto AnnotationItr = Annotations.begin(), AnnotationEnd = Annotations.end(); - for (unsigned Line = 1; - InputFilePtr != InputFileEnd || AnnotationItr != AnnotationEnd; - ++Line) { - const unsigned char *InputFileLine = InputFilePtr; - - // Print right-aligned line number. - WithColor(OS, raw_ostream::BLACK, true) - << format_decimal(Line, LabelWidth) << ": "; - - // For the case where -v and colors are enabled, find the annotations for - // good matches for expected patterns in order to highlight everything - // else in the line. There are no such annotations if -v is disabled. - std::vector FoundAndExpectedMatches; - if (Req.Verbose && WithColor(OS).colorsEnabled()) { - for (auto I = AnnotationItr; I != AnnotationEnd && I->InputLine == Line; - ++I) { - if (I->FoundAndExpectedMatch) - FoundAndExpectedMatches.push_back(*I); - } - } - - // Print numbered line with highlighting where there are no matches for - // expected patterns. - bool Newline = false; - { - WithColor COS(OS); - bool InMatch = false; - if (Req.Verbose) - COS.changeColor(raw_ostream::CYAN, true, true); - for (unsigned Col = 1; InputFilePtr != InputFileEnd && !Newline; ++Col) { - bool WasInMatch = InMatch; - InMatch = false; - for (auto M : FoundAndExpectedMatches) { - if (M.InputStartCol <= Col && Col < M.InputEndCol) { - InMatch = true; - break; - } - } - if (!WasInMatch && InMatch) - COS.resetColor(); - else if (WasInMatch && !InMatch) - COS.changeColor(raw_ostream::CYAN, true, true); - if (*InputFilePtr == '\n') - Newline = true; - else - COS << *InputFilePtr; - ++InputFilePtr; - } - } - OS << '\n'; - unsigned InputLineWidth = InputFilePtr - InputFileLine - Newline; - - // Print any annotations. - while (AnnotationItr != AnnotationEnd && - AnnotationItr->InputLine == Line) { - WithColor COS(OS, AnnotationItr->Marker.Color, true); - // The two spaces below are where the ": " appears on input lines. - COS << left_justify(AnnotationItr->Label, LabelWidth) << " "; - unsigned Col; - for (Col = 1; Col < AnnotationItr->InputStartCol; ++Col) - COS << ' '; - COS << AnnotationItr->Marker.Lead; - // If InputEndCol=UINT_MAX, stop at InputLineWidth. - for (++Col; Col < AnnotationItr->InputEndCol && Col <= InputLineWidth; - ++Col) - COS << '~'; - const std::string &Note = AnnotationItr->Marker.Note; - if (!Note.empty()) { - // Put the note at the end of the input line. If we were to instead - // put the note right after the marker, subsequent annotations for the - // same input line might appear to mark this note instead of the input - // line. - for (; Col <= InputLineWidth; ++Col) - COS << ' '; - COS << ' ' << Note; - } - COS << '\n'; - ++AnnotationItr; - } - } - - OS << ">>>>>>\n"; -} - -int main(int argc, char **argv) { - // Enable use of ANSI color codes because FileCheck is using them to - // highlight text. - llvm::sys::Process::UseANSIEscapeCodes(true); - - InitLLVM X(argc, argv); - cl::ParseCommandLineOptions(argc, argv, /*Overview*/ "", /*Errs*/ nullptr, - "FILECHECK_OPTS"); - if (DumpInput == DumpInputHelp) { - DumpInputAnnotationHelp(outs()); - return 0; - } - if (CheckFilename.empty()) { - errs() << " not specified\n"; - return 2; - } - - FileCheckRequest Req; - for (auto Prefix : CheckPrefixes) - Req.CheckPrefixes.push_back(Prefix); - - for (auto CheckNot : ImplicitCheckNot) - Req.ImplicitCheckNot.push_back(CheckNot); - - bool GlobalDefineError = false; - for (auto G : GlobalDefines) { - size_t EqIdx = G.find('='); - if (EqIdx == std::string::npos) { - errs() << "Missing equal sign in command-line definition '-D" << G - << "'\n"; - GlobalDefineError = true; - continue; - } - if (EqIdx == 0) { - errs() << "Missing variable name in command-line definition '-D" << G - << "'\n"; - GlobalDefineError = true; - continue; - } - Req.GlobalDefines.push_back(G); - } - if (GlobalDefineError) - return 2; - - Req.AllowEmptyInput = AllowEmptyInput; - Req.EnableVarScope = EnableVarScope; - Req.AllowDeprecatedDagOverlap = AllowDeprecatedDagOverlap; - Req.Verbose = Verbose; - Req.VerboseVerbose = VerboseVerbose; - Req.NoCanonicalizeWhiteSpace = NoCanonicalizeWhiteSpace; - Req.MatchFullLines = MatchFullLines; - Req.IgnoreCase = IgnoreCase; - - if (VerboseVerbose) - Req.Verbose = true; - - FileCheck FC(Req); - if (!FC.ValidateCheckPrefixes()) { - errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " - "start with a letter and contain only alphanumeric characters, " - "hyphens and underscores\n"; - return 2; - } - - Regex PrefixRE = FC.buildCheckPrefixRegex(); - std::string REError; - if (!PrefixRE.isValid(REError)) { - errs() << "Unable to combine check-prefix strings into a prefix regular " - "expression! This is likely a bug in FileCheck's verification of " - "the check-prefix strings. Regular expression parsing failed " - "with the following error: " - << REError << "\n"; - return 2; - } - - SourceMgr SM; - - // Read the expected strings from the check file. - ErrorOr> CheckFileOrErr = - MemoryBuffer::getFileOrSTDIN(CheckFilename); - if (std::error_code EC = CheckFileOrErr.getError()) { - errs() << "Could not open check file '" << CheckFilename - << "': " << EC.message() << '\n'; - return 2; - } - MemoryBuffer &CheckFile = *CheckFileOrErr.get(); - - SmallString<4096> CheckFileBuffer; - StringRef CheckFileText = FC.CanonicalizeFile(CheckFile, CheckFileBuffer); - - SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( - CheckFileText, CheckFile.getBufferIdentifier()), - SMLoc()); - - if (FC.readCheckFile(SM, CheckFileText, PrefixRE)) - return 2; - - // Open the file to check and add it to SourceMgr. - ErrorOr> InputFileOrErr = - MemoryBuffer::getFileOrSTDIN(InputFilename); - if (std::error_code EC = InputFileOrErr.getError()) { - errs() << "Could not open input file '" << InputFilename - << "': " << EC.message() << '\n'; - return 2; - } - MemoryBuffer &InputFile = *InputFileOrErr.get(); - - if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) { - errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; - DumpCommandLine(argc, argv); - return 2; - } - - SmallString<4096> InputFileBuffer; - StringRef InputFileText = FC.CanonicalizeFile(InputFile, InputFileBuffer); - - SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( - InputFileText, InputFile.getBufferIdentifier()), - SMLoc()); - - if (DumpInput == DumpInputDefault) - DumpInput = DumpInputOnFailure ? DumpInputFail : DumpInputNever; - - std::vector Diags; - int ExitCode = FC.checkInput(SM, InputFileText, - DumpInput == DumpInputNever ? nullptr : &Diags) - ? EXIT_SUCCESS - : 1; - if (DumpInput == DumpInputAlways || - (ExitCode == 1 && DumpInput == DumpInputFail)) { - errs() << "\n" - << "Input file: " - << (InputFilename == "-" ? "" : InputFilename.getValue()) - << "\n" - << "Check file: " << CheckFilename << "\n" - << "\n" - << "-dump-input=help describes the format of the following dump.\n" - << "\n"; - std::vector Annotations; - unsigned LabelWidth; - BuildInputAnnotations(Diags, Annotations, LabelWidth); - DumpAnnotatedInput(errs(), Req, InputFileText, Annotations, LabelWidth); - } - - return ExitCode; -} +//===- FileCheck.cpp - Check that File's Contents match what is expected --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// FileCheck does a line-by line check of a file that validates whether it +// contains the expected content. This is useful for regression tests etc. +// +// This program exits with an exit status of 2 on error, exit status of 0 if +// the file matched the expected contents, and exit status of 1 if it did not +// contain the expected contents. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/FileCheck.h" +#include +using namespace llvm; + +static cl::extrahelp FileCheckOptsEnv( + "\nOptions are parsed from the environment variable FILECHECK_OPTS and\n" + "from the command line.\n"); + +static cl::opt + CheckFilename(cl::Positional, cl::desc(""), cl::Optional); + +static cl::opt + InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), + cl::init("-"), cl::value_desc("filename")); + +static cl::list CheckPrefixes( + "check-prefix", + cl::desc("Prefix to use from check file (defaults to 'CHECK')")); +static cl::alias CheckPrefixesAlias( + "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated, + cl::NotHidden, + cl::desc( + "Alias for -check-prefix permitting multiple comma separated values")); + +static cl::opt NoCanonicalizeWhiteSpace( + "strict-whitespace", + cl::desc("Do not treat all horizontal whitespace as equivalent")); + +static cl::list ImplicitCheckNot( + "implicit-check-not", + cl::desc("Add an implicit negative check with this pattern to every\n" + "positive check. This can be used to ensure that no instances of\n" + "this pattern occur which are not matched by a positive pattern"), + cl::value_desc("pattern")); + +static cl::list + GlobalDefines("D", cl::AlwaysPrefix, + cl::desc("Define a variable to be used in capture patterns."), + cl::value_desc("VAR=VALUE")); + +static cl::opt AllowEmptyInput( + "allow-empty", cl::init(false), + cl::desc("Allow the input file to be empty. This is useful when making\n" + "checks that some error message does not occur, for example.")); + +static cl::opt MatchFullLines( + "match-full-lines", cl::init(false), + cl::desc("Require all positive matches to cover an entire input line.\n" + "Allows leading and trailing whitespace if --strict-whitespace\n" + "is not also passed.")); + +static cl::opt EnableVarScope( + "enable-var-scope", cl::init(false), + cl::desc("Enables scope for regex variables. Variables with names that\n" + "do not start with '$' will be reset at the beginning of\n" + "each CHECK-LABEL block.")); + +static cl::opt AllowDeprecatedDagOverlap( + "allow-deprecated-dag-overlap", cl::init(false), + cl::desc("Enable overlapping among matches in a group of consecutive\n" + "CHECK-DAG directives. This option is deprecated and is only\n" + "provided for convenience as old tests are migrated to the new\n" + "non-overlapping CHECK-DAG implementation.\n")); + +static cl::opt Verbose( + "v", cl::init(false), + cl::desc("Print directive pattern matches, or add them to the input dump\n" + "if enabled.\n")); + +static cl::opt VerboseVerbose( + "vv", cl::init(false), + cl::desc("Print information helpful in diagnosing internal FileCheck\n" + "issues, or add it to the input dump if enabled. Implies\n" + "-v.\n")); +static const char * DumpInputEnv = "FILECHECK_DUMP_INPUT_ON_FAILURE"; + +static cl::opt DumpInputOnFailure( + "dump-input-on-failure", + cl::init(std::getenv(DumpInputEnv) && *std::getenv(DumpInputEnv)), + cl::desc("Dump original input to stderr before failing.\n" + "The value can be also controlled using\n" + "FILECHECK_DUMP_INPUT_ON_FAILURE environment variable.\n" + "This option is deprecated in favor of -dump-input=fail.\n")); + +enum DumpInputValue { + DumpInputDefault, + DumpInputHelp, + DumpInputNever, + DumpInputFail, + DumpInputAlways +}; + +static cl::opt DumpInput( + "dump-input", cl::init(DumpInputDefault), + cl::desc("Dump input to stderr, adding annotations representing\n" + " currently enabled diagnostics\n"), + cl::value_desc("mode"), + cl::values(clEnumValN(DumpInputHelp, "help", + "Explain dump format and quit"), + clEnumValN(DumpInputNever, "never", "Never dump input"), + clEnumValN(DumpInputFail, "fail", "Dump input on failure"), + clEnumValN(DumpInputAlways, "always", "Always dump input"))); + +typedef cl::list::const_iterator prefix_iterator; + + + + + + + +static void DumpCommandLine(int argc, char **argv) { + errs() << "FileCheck command line: "; + for (int I = 0; I < argc; I++) + errs() << " " << argv[I]; + errs() << "\n"; +} + +struct MarkerStyle { + /// The starting char (before tildes) for marking the line. + char Lead; + /// What color to use for this annotation. + raw_ostream::Colors Color; + /// A note to follow the marker, or empty string if none. + std::string Note; + MarkerStyle() {} + MarkerStyle(char Lead, raw_ostream::Colors Color, + const std::string &Note = "") + : Lead(Lead), Color(Color), Note(Note) {} +}; + +static MarkerStyle GetMarker(FileCheckDiag::MatchType MatchTy) { + switch (MatchTy) { + case FileCheckDiag::MatchFoundAndExpected: + return MarkerStyle('^', raw_ostream::GREEN); + case FileCheckDiag::MatchFoundButExcluded: + return MarkerStyle('!', raw_ostream::RED, "error: no match expected"); + case FileCheckDiag::MatchFoundButWrongLine: + return MarkerStyle('!', raw_ostream::RED, "error: match on wrong line"); + case FileCheckDiag::MatchFoundButDiscarded: + return MarkerStyle('!', raw_ostream::CYAN, + "discard: overlaps earlier match"); + case FileCheckDiag::MatchNoneAndExcluded: + return MarkerStyle('X', raw_ostream::GREEN); + case FileCheckDiag::MatchNoneButExpected: + return MarkerStyle('X', raw_ostream::RED, "error: no match found"); + case FileCheckDiag::MatchFuzzy: + return MarkerStyle('?', raw_ostream::MAGENTA, "possible intended match"); + } + llvm_unreachable_internal("unexpected match type"); +} + +static void DumpInputAnnotationHelp(raw_ostream &OS) { + OS << "The following description was requested by -dump-input=help to\n" + << "explain the input annotations printed by -dump-input=always and\n" + << "-dump-input=fail:\n\n"; + + // Labels for input lines. + OS << " - "; + WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "L:"; + OS << " labels line number L of the input file\n"; + + // Labels for annotation lines. + OS << " - "; + WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "T:L"; + OS << " labels the only match result for a pattern of type T from " + << "line L of\n" + << " the check file\n"; + OS << " - "; + WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "T:L'N"; + OS << " labels the Nth match result for a pattern of type T from line " + << "L of\n" + << " the check file\n"; + + // Markers on annotation lines. + OS << " - "; + WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "^~~"; + OS << " marks good match (reported if -v)\n" + << " - "; + WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "!~~"; + OS << " marks bad match, such as:\n" + << " - CHECK-NEXT on same line as previous match (error)\n" + << " - CHECK-NOT found (error)\n" + << " - CHECK-DAG overlapping match (discarded, reported if " + << "-vv)\n" + << " - "; + WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "X~~"; + OS << " marks search range when no match is found, such as:\n" + << " - CHECK-NEXT not found (error)\n" + << " - CHECK-NOT not found (success, reported if -vv)\n" + << " - CHECK-DAG not found after discarded matches (error)\n" + << " - "; + WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "?"; + OS << " marks fuzzy match when no match is found\n"; + + // Colors. + OS << " - colors "; + WithColor(OS, raw_ostream::GREEN, true) << "success"; + OS << ", "; + WithColor(OS, raw_ostream::RED, true) << "error"; + OS << ", "; + WithColor(OS, raw_ostream::MAGENTA, true) << "fuzzy match"; + OS << ", "; + WithColor(OS, raw_ostream::CYAN, true, false) << "discarded match"; + OS << ", "; + WithColor(OS, raw_ostream::CYAN, true, true) << "unmatched input"; + OS << "\n\n" + << "If you are not seeing color above or in input dumps, try: -color\n"; +} + +/// An annotation for a single input line. +struct InputAnnotation { + /// The check file line (one-origin indexing) where the directive that + /// produced this annotation is located. + unsigned CheckLine; + /// The index of the match result for this check. + unsigned CheckDiagIndex; + /// The label for this annotation. + std::string Label; + /// What input line (one-origin indexing) this annotation marks. This might + /// be different from the starting line of the original diagnostic if this is + /// a non-initial fragment of a diagnostic that has been broken across + /// multiple lines. + unsigned InputLine; + /// The column range (one-origin indexing, open end) in which to to mark the + /// input line. If InputEndCol is UINT_MAX, treat it as the last column + /// before the newline. + unsigned InputStartCol, InputEndCol; + /// The marker to use. + MarkerStyle Marker; + /// Whether this annotation represents a good match for an expected pattern. + bool FoundAndExpectedMatch; +}; + +/// Get an abbreviation for the check type. +std::string GetCheckTypeAbbreviation(Check::FileCheckType Ty) { + switch (Ty) { + case Check::CheckPlain: + if (Ty.getCount() > 1) + return "count"; + return "check"; + case Check::CheckNext: + return "next"; + case Check::CheckSame: + return "same"; + case Check::CheckNot: + return "not"; + case Check::CheckDAG: + return "dag"; + case Check::CheckLabel: + return "label"; + case Check::CheckEmpty: + return "empty"; + case Check::CheckEOF: + return "eof"; + case Check::CheckBadNot: + return "bad-not"; + case Check::CheckBadCount: + return "bad-count"; + case Check::CheckNone: + llvm_unreachable("invalid FileCheckType"); + } + llvm_unreachable("unknown FileCheckType"); +} + +static void BuildInputAnnotations(const std::vector &Diags, + std::vector &Annotations, + unsigned &LabelWidth) { + // How many diagnostics has the current check seen so far? + unsigned CheckDiagCount = 0; + // What's the widest label? + LabelWidth = 0; + for (auto DiagItr = Diags.begin(), DiagEnd = Diags.end(); DiagItr != DiagEnd; + ++DiagItr) { + InputAnnotation A; + + // Build label, which uniquely identifies this check result. + A.CheckLine = DiagItr->CheckLine; + llvm::raw_string_ostream Label(A.Label); + Label << GetCheckTypeAbbreviation(DiagItr->CheckTy) << ":" + << DiagItr->CheckLine; + A.CheckDiagIndex = UINT_MAX; + auto DiagNext = std::next(DiagItr); + if (DiagNext != DiagEnd && DiagItr->CheckTy == DiagNext->CheckTy && + DiagItr->CheckLine == DiagNext->CheckLine) + A.CheckDiagIndex = CheckDiagCount++; + else if (CheckDiagCount) { + A.CheckDiagIndex = CheckDiagCount; + CheckDiagCount = 0; + } + if (A.CheckDiagIndex != UINT_MAX) + Label << "'" << A.CheckDiagIndex; + else + A.CheckDiagIndex = 0; + Label.flush(); + LabelWidth = std::max((std::string::size_type)LabelWidth, A.Label.size()); + + A.Marker = GetMarker(DiagItr->MatchTy); + A.FoundAndExpectedMatch = + DiagItr->MatchTy == FileCheckDiag::MatchFoundAndExpected; + + // Compute the mark location, and break annotation into multiple + // annotations if it spans multiple lines. + A.InputLine = DiagItr->InputStartLine; + A.InputStartCol = DiagItr->InputStartCol; + if (DiagItr->InputStartLine == DiagItr->InputEndLine) { + // Sometimes ranges are empty in order to indicate a specific point, but + // that would mean nothing would be marked, so adjust the range to + // include the following character. + A.InputEndCol = + std::max(DiagItr->InputStartCol + 1, DiagItr->InputEndCol); + Annotations.push_back(A); + } else { + assert(DiagItr->InputStartLine < DiagItr->InputEndLine && + "expected input range not to be inverted"); + A.InputEndCol = UINT_MAX; + Annotations.push_back(A); + for (unsigned L = DiagItr->InputStartLine + 1, E = DiagItr->InputEndLine; + L <= E; ++L) { + // If a range ends before the first column on a line, then it has no + // characters on that line, so there's nothing to render. + if (DiagItr->InputEndCol == 1 && L == E) + break; + InputAnnotation B; + B.CheckLine = A.CheckLine; + B.CheckDiagIndex = A.CheckDiagIndex; + B.Label = A.Label; + B.InputLine = L; + B.Marker = A.Marker; + B.Marker.Lead = '~'; + B.Marker.Note = ""; + B.InputStartCol = 1; + if (L != E) + B.InputEndCol = UINT_MAX; + else + B.InputEndCol = DiagItr->InputEndCol; + B.FoundAndExpectedMatch = A.FoundAndExpectedMatch; + Annotations.push_back(B); + } + } + } +} + +static void DumpAnnotatedInput(raw_ostream &OS, const FileCheckRequest &Req, + StringRef InputFileText, + std::vector &Annotations, + unsigned LabelWidth) { + OS << "Full input was:\n<<<<<<\n"; + + // Sort annotations. + // + // First, sort in the order of input lines to make it easier to find relevant + // annotations while iterating input lines in the implementation below. + // FileCheck diagnostics are not always reported and recorded in the order of + // input lines due to, for example, CHECK-DAG and CHECK-NOT. + // + // Second, for annotations for the same input line, sort in the order of the + // FileCheck directive's line in the check file (where there's at most one + // directive per line) and then by the index of the match result for that + // directive. The rationale of this choice is that, for any input line, this + // sort establishes a total order of annotations that, with respect to match + // results, is consistent across multiple lines, thus making match results + // easier to track from one line to the next when they span multiple lines. + std::sort(Annotations.begin(), Annotations.end(), + [](const InputAnnotation &A, const InputAnnotation &B) { + if (A.InputLine != B.InputLine) + return A.InputLine < B.InputLine; + if (A.CheckLine != B.CheckLine) + return A.CheckLine < B.CheckLine; + // FIXME: Sometimes CHECK-LABEL reports its match twice with + // other diagnostics in between, and then diag index incrementing + // fails to work properly, and then this assert fails. We should + // suppress one of those diagnostics or do a better job of + // computing this index. For now, we just produce a redundant + // CHECK-LABEL annotation. + // assert(A.CheckDiagIndex != B.CheckDiagIndex && + // "expected diagnostic indices to be unique within a " + // " check line"); + return A.CheckDiagIndex < B.CheckDiagIndex; + }); + + // Compute the width of the label column. + const unsigned char *InputFilePtr = InputFileText.bytes_begin(), + *InputFileEnd = InputFileText.bytes_end(); + unsigned LineCount = InputFileText.count('\n'); + if (InputFileEnd[-1] != '\n') + ++LineCount; + unsigned LineNoWidth = std::log10(LineCount) + 1; + // +3 below adds spaces (1) to the left of the (right-aligned) line numbers + // on input lines and (2) to the right of the (left-aligned) labels on + // annotation lines so that input lines and annotation lines are more + // visually distinct. For example, the spaces on the annotation lines ensure + // that input line numbers and check directive line numbers never align + // horizontally. Those line numbers might not even be for the same file. + // One space would be enough to achieve that, but more makes it even easier + // to see. + LabelWidth = std::max(LabelWidth, LineNoWidth) + 3; + + // Print annotated input lines. + auto AnnotationItr = Annotations.begin(), AnnotationEnd = Annotations.end(); + for (unsigned Line = 1; + InputFilePtr != InputFileEnd || AnnotationItr != AnnotationEnd; + ++Line) { + const unsigned char *InputFileLine = InputFilePtr; + + // Print right-aligned line number. + WithColor(OS, raw_ostream::BLACK, true) + << format_decimal(Line, LabelWidth) << ": "; + + // For the case where -v and colors are enabled, find the annotations for + // good matches for expected patterns in order to highlight everything + // else in the line. There are no such annotations if -v is disabled. + std::vector FoundAndExpectedMatches; + if (Req.Verbose && WithColor(OS).colorsEnabled()) { + for (auto I = AnnotationItr; I != AnnotationEnd && I->InputLine == Line; + ++I) { + if (I->FoundAndExpectedMatch) + FoundAndExpectedMatches.push_back(*I); + } + } + + // Print numbered line with highlighting where there are no matches for + // expected patterns. + bool Newline = false; + { + WithColor COS(OS); + bool InMatch = false; + if (Req.Verbose) + COS.changeColor(raw_ostream::CYAN, true, true); + for (unsigned Col = 1; InputFilePtr != InputFileEnd && !Newline; ++Col) { + bool WasInMatch = InMatch; + InMatch = false; + for (auto M : FoundAndExpectedMatches) { + if (M.InputStartCol <= Col && Col < M.InputEndCol) { + InMatch = true; + break; + } + } + if (!WasInMatch && InMatch) + COS.resetColor(); + else if (WasInMatch && !InMatch) + COS.changeColor(raw_ostream::CYAN, true, true); + if (*InputFilePtr == '\n') + Newline = true; + else + COS << *InputFilePtr; + ++InputFilePtr; + } + } + OS << '\n'; + unsigned InputLineWidth = InputFilePtr - InputFileLine - Newline; + + // Print any annotations. + while (AnnotationItr != AnnotationEnd && + AnnotationItr->InputLine == Line) { + WithColor COS(OS, AnnotationItr->Marker.Color, true); + // The two spaces below are where the ": " appears on input lines. + COS << left_justify(AnnotationItr->Label, LabelWidth) << " "; + unsigned Col; + for (Col = 1; Col < AnnotationItr->InputStartCol; ++Col) + COS << ' '; + COS << AnnotationItr->Marker.Lead; + // If InputEndCol=UINT_MAX, stop at InputLineWidth. + for (++Col; Col < AnnotationItr->InputEndCol && Col <= InputLineWidth; + ++Col) + COS << '~'; + const std::string &Note = AnnotationItr->Marker.Note; + if (!Note.empty()) { + // Put the note at the end of the input line. If we were to instead + // put the note right after the marker, subsequent annotations for the + // same input line might appear to mark this note instead of the input + // line. + for (; Col <= InputLineWidth; ++Col) + COS << ' '; + COS << ' ' << Note; + } + COS << '\n'; + ++AnnotationItr; + } + } + + OS << ">>>>>>\n"; +} + +int main(int argc, char **argv) { + // Enable use of ANSI color codes because FileCheck is using them to + // highlight text. + llvm::sys::Process::UseANSIEscapeCodes(true); + + InitLLVM X(argc, argv); + cl::ParseCommandLineOptions(argc, argv, /*Overview*/ "", /*Errs*/ nullptr, + "FILECHECK_OPTS"); + if (DumpInput == DumpInputHelp) { + DumpInputAnnotationHelp(outs()); + return 0; + } + if (CheckFilename.empty()) { + errs() << " not specified\n"; + return 2; + } + + FileCheckRequest Req; + for (auto Prefix : CheckPrefixes) + Req.CheckPrefixes.push_back(Prefix); + + for (auto CheckNot : ImplicitCheckNot) + Req.ImplicitCheckNot.push_back(CheckNot); + + bool GlobalDefineError = false; + for (auto G : GlobalDefines) { + size_t EqIdx = G.find('='); + if (EqIdx == std::string::npos) { + errs() << "Missing equal sign in command-line definition '-D" << G + << "'\n"; + GlobalDefineError = true; + continue; + } + if (EqIdx == 0) { + errs() << "Missing variable name in command-line definition '-D" << G + << "'\n"; + GlobalDefineError = true; + continue; + } + Req.GlobalDefines.push_back(G); + } + if (GlobalDefineError) + return 2; + + Req.AllowEmptyInput = AllowEmptyInput; + Req.EnableVarScope = EnableVarScope; + Req.AllowDeprecatedDagOverlap = AllowDeprecatedDagOverlap; + Req.Verbose = Verbose; + Req.VerboseVerbose = VerboseVerbose; + Req.NoCanonicalizeWhiteSpace = NoCanonicalizeWhiteSpace; + Req.MatchFullLines = MatchFullLines; + + if (VerboseVerbose) + Req.Verbose = true; + + FileCheck FC(Req); + if (!FC.ValidateCheckPrefixes()) { + errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " + "start with a letter and contain only alphanumeric characters, " + "hyphens and underscores\n"; + return 2; + } + + Regex PrefixRE = FC.buildCheckPrefixRegex(); + std::string REError; + if (!PrefixRE.isValid(REError)) { + errs() << "Unable to combine check-prefix strings into a prefix regular " + "expression! This is likely a bug in FileCheck's verification of " + "the check-prefix strings. Regular expression parsing failed " + "with the following error: " + << REError << "\n"; + return 2; + } + + SourceMgr SM; + + // Read the expected strings from the check file. + ErrorOr> CheckFileOrErr = + MemoryBuffer::getFileOrSTDIN(CheckFilename); + if (std::error_code EC = CheckFileOrErr.getError()) { + errs() << "Could not open check file '" << CheckFilename + << "': " << EC.message() << '\n'; + return 2; + } + MemoryBuffer &CheckFile = *CheckFileOrErr.get(); + + SmallString<4096> CheckFileBuffer; + StringRef CheckFileText = FC.CanonicalizeFile(CheckFile, CheckFileBuffer); + + SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( + CheckFileText, CheckFile.getBufferIdentifier()), + SMLoc()); + + if (FC.readCheckFile(SM, CheckFileText, PrefixRE)) + return 2; + + // Open the file to check and add it to SourceMgr. + ErrorOr> InputFileOrErr = + MemoryBuffer::getFileOrSTDIN(InputFilename); + if (std::error_code EC = InputFileOrErr.getError()) { + errs() << "Could not open input file '" << InputFilename + << "': " << EC.message() << '\n'; + return 2; + } + MemoryBuffer &InputFile = *InputFileOrErr.get(); + + if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) { + errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; + DumpCommandLine(argc, argv); + return 2; + } + + SmallString<4096> InputFileBuffer; + StringRef InputFileText = FC.CanonicalizeFile(InputFile, InputFileBuffer); + + SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( + InputFileText, InputFile.getBufferIdentifier()), + SMLoc()); + + if (DumpInput == DumpInputDefault) + DumpInput = DumpInputOnFailure ? DumpInputFail : DumpInputNever; + + std::vector Diags; + int ExitCode = FC.checkInput(SM, InputFileText, + DumpInput == DumpInputNever ? nullptr : &Diags) + ? EXIT_SUCCESS + : 1; + if (DumpInput == DumpInputAlways || + (ExitCode == 1 && DumpInput == DumpInputFail)) { + errs() << "\n" + << "Input file: " + << (InputFilename == "-" ? "" : InputFilename.getValue()) + << "\n" + << "Check file: " << CheckFilename << "\n" + << "\n" + << "-dump-input=help describes the format of the following dump.\n" + << "\n"; + std::vector Annotations; + unsigned LabelWidth; + BuildInputAnnotations(Diags, Annotations, LabelWidth); + DumpAnnotatedInput(errs(), Req, InputFileText, Annotations, LabelWidth); + } + + return ExitCode; +} -- 2.40.0