]> granicus.if.org Git - llvm/commitdiff
[support] GlobPattern: add support for `\` and `[!...]`, and allow `]` in more places
authorJordan Rupprecht <rupprecht@google.com>
Wed, 16 Oct 2019 22:31:16 +0000 (22:31 +0000)
committerJordan Rupprecht <rupprecht@google.com>
Wed, 16 Oct 2019 22:31:16 +0000 (22:31 +0000)
Summary: Update GlobPattern in libSupport to handle a few more cases. It does not fully match the `fnmatch` used by GNU objcopy since named character classes (e.g. `[[:digit:]]`) are not supported, but this should support most existing use cases (mostly just `*` is what's used anyway).

This will be used to implement the `--wildcard` flag in llvm-objcopy to be more compatible with GNU objcopy.

This is split off of D66613 to land the libSupport changes separately. The llvm-objcopy part will land soon.

Reviewers: jhenderson, MaskRay, evgeny777, espindola, alexshap

Reviewed By: MaskRay

Subscribers: nickdesaulniers, emaste, arichardson, hiraditya, jakehehrlich, abrachet, seiya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D66613

undo objcopy changes to make this libsupport only

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375051 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/Support/GlobPattern.h
lib/Support/GlobPattern.cpp
unittests/Support/GlobPatternTest.cpp

index 66a4cd94c12ada304d7e9d19ce65737ca08abbce..0098ac65fd308c873ab518401c91ee41d83feadb 100644 (file)
@@ -21,7 +21,7 @@
 #include <vector>
 
 // This class represents a glob pattern. Supported metacharacters
-// are "*", "?", "[<chars>]" and "[^<chars>]".
+// are "*", "?", "\", "[<chars>]", "[^<chars>]", and "[!<chars>]".
 namespace llvm {
 class BitVector;
 template <typename T> class ArrayRef;
index 6011be86d77fba053e9b27672e13a3eb1158dd88..8dae6941ec7709cce1843fa0a6b4734efda19f03 100644 (file)
@@ -19,7 +19,7 @@
 using namespace llvm;
 
 static bool hasWildcard(StringRef S) {
-  return S.find_first_of("?*[") != StringRef::npos;
+  return S.find_first_of("?*[\\") != StringRef::npos;
 }
 
 // Expands character ranges and returns a bitmap.
@@ -60,8 +60,9 @@ static Expected<BitVector> expand(StringRef S, StringRef Original) {
 }
 
 // This is a scanner for the glob pattern.
-// A glob pattern token is one of "*", "?", "[<chars>]", "[^<chars>]"
-// (which is a negative form of "[<chars>]"), or a non-meta character.
+// A glob pattern token is one of "*", "?", "\", "[<chars>]", "[^<chars>]"
+// (which is a negative form of "[<chars>]"), "[!<chars>]" (which is
+// equivalent to "[^<chars>]"), or a non-meta character.
 // This function returns the first token in S.
 static Expected<BitVector> scan(StringRef &S, StringRef Original) {
   switch (S[0]) {
@@ -74,14 +75,16 @@ static Expected<BitVector> scan(StringRef &S, StringRef Original) {
     S = S.substr(1);
     return BitVector(256, true);
   case '[': {
-    size_t End = S.find(']', 1);
+    // ']' is allowed as the first character of a character class. '[]' is
+    // invalid. So, just skip the first character.
+    size_t End = S.find(']', 2);
     if (End == StringRef::npos)
       return make_error<StringError>("invalid glob pattern: " + Original,
                                      errc::invalid_argument);
 
     StringRef Chars = S.substr(1, End - 1);
     S = S.substr(End + 1);
-    if (Chars.startswith("^")) {
+    if (Chars.startswith("^") || Chars.startswith("!")) {
       Expected<BitVector> BV = expand(Chars.substr(1), Original);
       if (!BV)
         return BV.takeError();
@@ -89,6 +92,11 @@ static Expected<BitVector> scan(StringRef &S, StringRef Original) {
     }
     return expand(Chars, Original);
   }
+  case '\\':
+    // Eat this character and fall through below to treat it like a non-meta
+    // character.
+    S = S.substr(1);
+    LLVM_FALLTHROUGH;
   default:
     BitVector BV(256, false);
     BV[(uint8_t)S[0]] = true;
@@ -107,8 +115,9 @@ Expected<GlobPattern> GlobPattern::create(StringRef S) {
     return Pat;
   }
 
-  // S is something like "foo*". We can use startswith().
-  if (S.endswith("*") && !hasWildcard(S.drop_back())) {
+  // S is something like "foo*", and the "* is not escaped. We can use
+  // startswith().
+  if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) {
     Pat.Prefix = S.drop_back();
     return Pat;
   }
index 113b20e047eae67113c605e3a4ebcf1e6b16383a..17d60b2b85087685ed96901ef889f2b1048db5f5 100644 (file)
@@ -14,57 +14,115 @@ namespace {
 
 class GlobPatternTest : public ::testing::Test {};
 
-TEST_F(GlobPatternTest, Basics) {
+TEST_F(GlobPatternTest, Empty) {
   Expected<GlobPattern> Pat1 = GlobPattern::create("");
   EXPECT_TRUE((bool)Pat1);
   EXPECT_TRUE(Pat1->match(""));
   EXPECT_FALSE(Pat1->match("a"));
+}
+
+TEST_F(GlobPatternTest, Glob) {
+  Expected<GlobPattern> Pat1 = GlobPattern::create("ab*c*def");
+  EXPECT_TRUE((bool)Pat1);
+  EXPECT_TRUE(Pat1->match("abcdef"));
+  EXPECT_TRUE(Pat1->match("abxcxdef"));
+  EXPECT_FALSE(Pat1->match(""));
+  EXPECT_FALSE(Pat1->match("xabcdef"));
+  EXPECT_FALSE(Pat1->match("abcdefx"));
+}
+
+TEST_F(GlobPatternTest, Wildcard) {
+  Expected<GlobPattern> Pat1 = GlobPattern::create("a??c");
+  EXPECT_TRUE((bool)Pat1);
+  EXPECT_TRUE(Pat1->match("axxc"));
+  EXPECT_FALSE(Pat1->match("axxx"));
+  EXPECT_FALSE(Pat1->match(""));
+}
 
-  Expected<GlobPattern> Pat2 = GlobPattern::create("ab*c*def");
+TEST_F(GlobPatternTest, Escape) {
+  Expected<GlobPattern> Pat1 = GlobPattern::create("\\*");
+  EXPECT_TRUE((bool)Pat1);
+  EXPECT_TRUE(Pat1->match("*"));
+  EXPECT_FALSE(Pat1->match("\\*"));
+  EXPECT_FALSE(Pat1->match("a"));
+
+  Expected<GlobPattern> Pat2 = GlobPattern::create("a?\\?c");
   EXPECT_TRUE((bool)Pat2);
-  EXPECT_TRUE(Pat2->match("abcdef"));
-  EXPECT_TRUE(Pat2->match("abxcxdef"));
+  EXPECT_TRUE(Pat2->match("ax?c"));
+  EXPECT_FALSE(Pat2->match("axxc"));
   EXPECT_FALSE(Pat2->match(""));
-  EXPECT_FALSE(Pat2->match("xabcdef"));
-  EXPECT_FALSE(Pat2->match("abcdefx"));
-
-  Expected<GlobPattern> Pat3 = GlobPattern::create("a??c");
-  EXPECT_TRUE((bool)Pat3);
-  EXPECT_TRUE(Pat3->match("axxc"));
-  EXPECT_FALSE(Pat3->match("axxx"));
-  EXPECT_FALSE(Pat3->match(""));
-
-  Expected<GlobPattern> Pat4 = GlobPattern::create("[abc-fy-z]");
-  EXPECT_TRUE((bool)Pat4);
-  EXPECT_TRUE(Pat4->match("a"));
-  EXPECT_TRUE(Pat4->match("b"));
-  EXPECT_TRUE(Pat4->match("c"));
-  EXPECT_TRUE(Pat4->match("d"));
-  EXPECT_TRUE(Pat4->match("e"));
-  EXPECT_TRUE(Pat4->match("f"));
-  EXPECT_TRUE(Pat4->match("y"));
-  EXPECT_TRUE(Pat4->match("z"));
-  EXPECT_FALSE(Pat4->match("g"));
-  EXPECT_FALSE(Pat4->match(""));
-
-  Expected<GlobPattern> Pat5 = GlobPattern::create("[^abc-fy-z]");
-  EXPECT_TRUE((bool)Pat5);
-  EXPECT_TRUE(Pat5->match("g"));
-  EXPECT_FALSE(Pat5->match("a"));
-  EXPECT_FALSE(Pat5->match("b"));
-  EXPECT_FALSE(Pat5->match("c"));
-  EXPECT_FALSE(Pat5->match("d"));
-  EXPECT_FALSE(Pat5->match("e"));
-  EXPECT_FALSE(Pat5->match("f"));
-  EXPECT_FALSE(Pat5->match("y"));
-  EXPECT_FALSE(Pat5->match("z"));
-  EXPECT_FALSE(Pat5->match(""));
+}
+
+TEST_F(GlobPatternTest, BasicCharacterClass) {
+  Expected<GlobPattern> Pat1 = GlobPattern::create("[abc-fy-z]");
+  EXPECT_TRUE((bool)Pat1);
+  EXPECT_TRUE(Pat1->match("a"));
+  EXPECT_TRUE(Pat1->match("b"));
+  EXPECT_TRUE(Pat1->match("c"));
+  EXPECT_TRUE(Pat1->match("d"));
+  EXPECT_TRUE(Pat1->match("e"));
+  EXPECT_TRUE(Pat1->match("f"));
+  EXPECT_TRUE(Pat1->match("y"));
+  EXPECT_TRUE(Pat1->match("z"));
+  EXPECT_FALSE(Pat1->match("g"));
+  EXPECT_FALSE(Pat1->match(""));
+}
+
+TEST_F(GlobPatternTest, NegatedCharacterClass) {
+  Expected<GlobPattern> Pat1 = GlobPattern::create("[^abc-fy-z]");
+  EXPECT_TRUE((bool)Pat1);
+  EXPECT_TRUE(Pat1->match("g"));
+  EXPECT_FALSE(Pat1->match("a"));
+  EXPECT_FALSE(Pat1->match("b"));
+  EXPECT_FALSE(Pat1->match("c"));
+  EXPECT_FALSE(Pat1->match("d"));
+  EXPECT_FALSE(Pat1->match("e"));
+  EXPECT_FALSE(Pat1->match("f"));
+  EXPECT_FALSE(Pat1->match("y"));
+  EXPECT_FALSE(Pat1->match("z"));
+  EXPECT_FALSE(Pat1->match(""));
+
+  Expected<GlobPattern> Pat2 = GlobPattern::create("[!abc-fy-z]");
+  EXPECT_TRUE((bool)Pat2);
+  EXPECT_TRUE(Pat2->match("g"));
+  EXPECT_FALSE(Pat2->match("a"));
+  EXPECT_FALSE(Pat2->match("b"));
+  EXPECT_FALSE(Pat2->match("c"));
+  EXPECT_FALSE(Pat2->match("d"));
+  EXPECT_FALSE(Pat2->match("e"));
+  EXPECT_FALSE(Pat2->match("f"));
+  EXPECT_FALSE(Pat2->match("y"));
+  EXPECT_FALSE(Pat2->match("z"));
+  EXPECT_FALSE(Pat2->match(""));
+}
+
+TEST_F(GlobPatternTest, BracketFrontOfCharacterClass) {
+  Expected<GlobPattern> Pat1 = GlobPattern::create("[]a]x");
+  EXPECT_TRUE((bool)Pat1);
+  EXPECT_TRUE(Pat1->match("]x"));
+  EXPECT_TRUE(Pat1->match("ax"));
+  EXPECT_FALSE(Pat1->match("a]x"));
+  EXPECT_FALSE(Pat1->match(""));
+}
+
+TEST_F(GlobPatternTest, SpecialCharsInCharacterClass) {
+  Expected<GlobPattern> Pat1 = GlobPattern::create("[*?^]");
+  EXPECT_TRUE((bool)Pat1);
+  EXPECT_TRUE(Pat1->match("*"));
+  EXPECT_TRUE(Pat1->match("?"));
+  EXPECT_TRUE(Pat1->match("^"));
+  EXPECT_FALSE(Pat1->match("*?^"));
+  EXPECT_FALSE(Pat1->match(""));
 }
 
 TEST_F(GlobPatternTest, Invalid) {
   Expected<GlobPattern> Pat1 = GlobPattern::create("[");
   EXPECT_FALSE((bool)Pat1);
   handleAllErrors(Pat1.takeError(), [&](ErrorInfoBase &EIB) {});
+
+  Expected<GlobPattern> Pat2 = GlobPattern::create("[]");
+  EXPECT_FALSE((bool)Pat2);
+  handleAllErrors(Pat2.takeError(), [&](ErrorInfoBase &EIB) {});
 }
 
 TEST_F(GlobPatternTest, ExtSym) {