From 1b6d9ac9fa8e50387b28694d74d8ce149dd4c379 Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Wed, 16 Oct 2019 22:31:16 +0000 Subject: [PATCH] [support] GlobPattern: add support for `\` and `[!...]`, and allow `]` in more places Summary: Update GlobPattern in libSupport to handle a few more cases. It does not fully match the `fnmatch` used by GNU objcopy since named character classes (e.g. `[[:digit:]]`) are not supported, but this should support most existing use cases (mostly just `*` is what's used anyway). This will be used to implement the `--wildcard` flag in llvm-objcopy to be more compatible with GNU objcopy. This is split off of D66613 to land the libSupport changes separately. The llvm-objcopy part will land soon. Reviewers: jhenderson, MaskRay, evgeny777, espindola, alexshap Reviewed By: MaskRay Subscribers: nickdesaulniers, emaste, arichardson, hiraditya, jakehehrlich, abrachet, seiya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D66613 undo objcopy changes to make this libsupport only git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375051 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/GlobPattern.h | 2 +- lib/Support/GlobPattern.cpp | 23 +++-- unittests/Support/GlobPatternTest.cpp | 134 ++++++++++++++++++-------- 3 files changed, 113 insertions(+), 46 deletions(-) diff --git a/include/llvm/Support/GlobPattern.h b/include/llvm/Support/GlobPattern.h index 66a4cd94c12..0098ac65fd3 100644 --- a/include/llvm/Support/GlobPattern.h +++ b/include/llvm/Support/GlobPattern.h @@ -21,7 +21,7 @@ #include // This class represents a glob pattern. Supported metacharacters -// are "*", "?", "[]" and "[^]". +// are "*", "?", "\", "[]", "[^]", and "[!]". namespace llvm { class BitVector; template class ArrayRef; diff --git a/lib/Support/GlobPattern.cpp b/lib/Support/GlobPattern.cpp index 6011be86d77..8dae6941ec7 100644 --- a/lib/Support/GlobPattern.cpp +++ b/lib/Support/GlobPattern.cpp @@ -19,7 +19,7 @@ using namespace llvm; static bool hasWildcard(StringRef S) { - return S.find_first_of("?*[") != StringRef::npos; + return S.find_first_of("?*[\\") != StringRef::npos; } // Expands character ranges and returns a bitmap. @@ -60,8 +60,9 @@ static Expected expand(StringRef S, StringRef Original) { } // This is a scanner for the glob pattern. -// A glob pattern token is one of "*", "?", "[]", "[^]" -// (which is a negative form of "[]"), or a non-meta character. +// A glob pattern token is one of "*", "?", "\", "[]", "[^]" +// (which is a negative form of "[]"), "[!]" (which is +// equivalent to "[^]"), or a non-meta character. // This function returns the first token in S. static Expected scan(StringRef &S, StringRef Original) { switch (S[0]) { @@ -74,14 +75,16 @@ static Expected scan(StringRef &S, StringRef Original) { S = S.substr(1); return BitVector(256, true); case '[': { - size_t End = S.find(']', 1); + // ']' is allowed as the first character of a character class. '[]' is + // invalid. So, just skip the first character. + size_t End = S.find(']', 2); if (End == StringRef::npos) return make_error("invalid glob pattern: " + Original, errc::invalid_argument); StringRef Chars = S.substr(1, End - 1); S = S.substr(End + 1); - if (Chars.startswith("^")) { + if (Chars.startswith("^") || Chars.startswith("!")) { Expected BV = expand(Chars.substr(1), Original); if (!BV) return BV.takeError(); @@ -89,6 +92,11 @@ static Expected scan(StringRef &S, StringRef Original) { } return expand(Chars, Original); } + case '\\': + // Eat this character and fall through below to treat it like a non-meta + // character. + S = S.substr(1); + LLVM_FALLTHROUGH; default: BitVector BV(256, false); BV[(uint8_t)S[0]] = true; @@ -107,8 +115,9 @@ Expected GlobPattern::create(StringRef S) { return Pat; } - // S is something like "foo*". We can use startswith(). - if (S.endswith("*") && !hasWildcard(S.drop_back())) { + // S is something like "foo*", and the "* is not escaped. We can use + // startswith(). + if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) { Pat.Prefix = S.drop_back(); return Pat; } diff --git a/unittests/Support/GlobPatternTest.cpp b/unittests/Support/GlobPatternTest.cpp index 113b20e047e..17d60b2b850 100644 --- a/unittests/Support/GlobPatternTest.cpp +++ b/unittests/Support/GlobPatternTest.cpp @@ -14,57 +14,115 @@ namespace { class GlobPatternTest : public ::testing::Test {}; -TEST_F(GlobPatternTest, Basics) { +TEST_F(GlobPatternTest, Empty) { Expected Pat1 = GlobPattern::create(""); EXPECT_TRUE((bool)Pat1); EXPECT_TRUE(Pat1->match("")); EXPECT_FALSE(Pat1->match("a")); +} + +TEST_F(GlobPatternTest, Glob) { + Expected Pat1 = GlobPattern::create("ab*c*def"); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->match("abcdef")); + EXPECT_TRUE(Pat1->match("abxcxdef")); + EXPECT_FALSE(Pat1->match("")); + EXPECT_FALSE(Pat1->match("xabcdef")); + EXPECT_FALSE(Pat1->match("abcdefx")); +} + +TEST_F(GlobPatternTest, Wildcard) { + Expected Pat1 = GlobPattern::create("a??c"); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->match("axxc")); + EXPECT_FALSE(Pat1->match("axxx")); + EXPECT_FALSE(Pat1->match("")); +} - Expected Pat2 = GlobPattern::create("ab*c*def"); +TEST_F(GlobPatternTest, Escape) { + Expected Pat1 = GlobPattern::create("\\*"); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->match("*")); + EXPECT_FALSE(Pat1->match("\\*")); + EXPECT_FALSE(Pat1->match("a")); + + Expected Pat2 = GlobPattern::create("a?\\?c"); EXPECT_TRUE((bool)Pat2); - EXPECT_TRUE(Pat2->match("abcdef")); - EXPECT_TRUE(Pat2->match("abxcxdef")); + EXPECT_TRUE(Pat2->match("ax?c")); + EXPECT_FALSE(Pat2->match("axxc")); EXPECT_FALSE(Pat2->match("")); - EXPECT_FALSE(Pat2->match("xabcdef")); - EXPECT_FALSE(Pat2->match("abcdefx")); - - Expected Pat3 = GlobPattern::create("a??c"); - EXPECT_TRUE((bool)Pat3); - EXPECT_TRUE(Pat3->match("axxc")); - EXPECT_FALSE(Pat3->match("axxx")); - EXPECT_FALSE(Pat3->match("")); - - Expected Pat4 = GlobPattern::create("[abc-fy-z]"); - EXPECT_TRUE((bool)Pat4); - EXPECT_TRUE(Pat4->match("a")); - EXPECT_TRUE(Pat4->match("b")); - EXPECT_TRUE(Pat4->match("c")); - EXPECT_TRUE(Pat4->match("d")); - EXPECT_TRUE(Pat4->match("e")); - EXPECT_TRUE(Pat4->match("f")); - EXPECT_TRUE(Pat4->match("y")); - EXPECT_TRUE(Pat4->match("z")); - EXPECT_FALSE(Pat4->match("g")); - EXPECT_FALSE(Pat4->match("")); - - Expected Pat5 = GlobPattern::create("[^abc-fy-z]"); - EXPECT_TRUE((bool)Pat5); - EXPECT_TRUE(Pat5->match("g")); - EXPECT_FALSE(Pat5->match("a")); - EXPECT_FALSE(Pat5->match("b")); - EXPECT_FALSE(Pat5->match("c")); - EXPECT_FALSE(Pat5->match("d")); - EXPECT_FALSE(Pat5->match("e")); - EXPECT_FALSE(Pat5->match("f")); - EXPECT_FALSE(Pat5->match("y")); - EXPECT_FALSE(Pat5->match("z")); - EXPECT_FALSE(Pat5->match("")); +} + +TEST_F(GlobPatternTest, BasicCharacterClass) { + Expected Pat1 = GlobPattern::create("[abc-fy-z]"); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->match("a")); + EXPECT_TRUE(Pat1->match("b")); + EXPECT_TRUE(Pat1->match("c")); + EXPECT_TRUE(Pat1->match("d")); + EXPECT_TRUE(Pat1->match("e")); + EXPECT_TRUE(Pat1->match("f")); + EXPECT_TRUE(Pat1->match("y")); + EXPECT_TRUE(Pat1->match("z")); + EXPECT_FALSE(Pat1->match("g")); + EXPECT_FALSE(Pat1->match("")); +} + +TEST_F(GlobPatternTest, NegatedCharacterClass) { + Expected Pat1 = GlobPattern::create("[^abc-fy-z]"); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->match("g")); + EXPECT_FALSE(Pat1->match("a")); + EXPECT_FALSE(Pat1->match("b")); + EXPECT_FALSE(Pat1->match("c")); + EXPECT_FALSE(Pat1->match("d")); + EXPECT_FALSE(Pat1->match("e")); + EXPECT_FALSE(Pat1->match("f")); + EXPECT_FALSE(Pat1->match("y")); + EXPECT_FALSE(Pat1->match("z")); + EXPECT_FALSE(Pat1->match("")); + + Expected Pat2 = GlobPattern::create("[!abc-fy-z]"); + EXPECT_TRUE((bool)Pat2); + EXPECT_TRUE(Pat2->match("g")); + EXPECT_FALSE(Pat2->match("a")); + EXPECT_FALSE(Pat2->match("b")); + EXPECT_FALSE(Pat2->match("c")); + EXPECT_FALSE(Pat2->match("d")); + EXPECT_FALSE(Pat2->match("e")); + EXPECT_FALSE(Pat2->match("f")); + EXPECT_FALSE(Pat2->match("y")); + EXPECT_FALSE(Pat2->match("z")); + EXPECT_FALSE(Pat2->match("")); +} + +TEST_F(GlobPatternTest, BracketFrontOfCharacterClass) { + Expected Pat1 = GlobPattern::create("[]a]x"); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->match("]x")); + EXPECT_TRUE(Pat1->match("ax")); + EXPECT_FALSE(Pat1->match("a]x")); + EXPECT_FALSE(Pat1->match("")); +} + +TEST_F(GlobPatternTest, SpecialCharsInCharacterClass) { + Expected Pat1 = GlobPattern::create("[*?^]"); + EXPECT_TRUE((bool)Pat1); + EXPECT_TRUE(Pat1->match("*")); + EXPECT_TRUE(Pat1->match("?")); + EXPECT_TRUE(Pat1->match("^")); + EXPECT_FALSE(Pat1->match("*?^")); + EXPECT_FALSE(Pat1->match("")); } TEST_F(GlobPatternTest, Invalid) { Expected Pat1 = GlobPattern::create("["); EXPECT_FALSE((bool)Pat1); handleAllErrors(Pat1.takeError(), [&](ErrorInfoBase &EIB) {}); + + Expected Pat2 = GlobPattern::create("[]"); + EXPECT_FALSE((bool)Pat2); + handleAllErrors(Pat2.takeError(), [&](ErrorInfoBase &EIB) {}); } TEST_F(GlobPatternTest, ExtSym) { -- 2.40.0