[Support] Add StringRef::consumeInteger.

author Zachary Turner <zturner@google.com>

Thu, 22 Sep 2016 15:05:19 +0000 (15:05 +0000)

committer Zachary Turner <zturner@google.com>

Thu, 22 Sep 2016 15:05:19 +0000 (15:05 +0000)
author Zachary Turner <zturner@google.com>
Thu, 22 Sep 2016 15:05:19 +0000 (15:05 +0000)
committer Zachary Turner <zturner@google.com>
Thu, 22 Sep 2016 15:05:19 +0000 (15:05 +0000)
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h

index ff423e8a34090c6c5f400e1561ab29436d1edc00..94f0538e4458e1f618708fe29da4fa5229201f4c 100644 (file)
--- a/include/llvm/ADT/StringRef.h
+++ b/include/llvm/ADT/StringRef.h
@@ -32,6 +32,10 @@ namespace llvm {
  
    bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
  
+  bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
+                              unsigned long long &Result);
+  bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result);
+
    /// StringRef - Represent a constant reference to a string, i.e. a character
    /// array and a length, which need not be null terminated.
    ///
@@ -397,6 +401,37 @@ namespace llvm {
        return false;
      }
  
+    /// Parse the current string as an integer of the specified radix.  If
+    /// \p Radix is specified as zero, this does radix autosensing using
+    /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
+    ///
+    /// If the string does not begin with a number of the specified radix,
+    /// this returns true to signify the error. The string is considered
+    /// erroneous if empty or if it overflows T.
+    /// The portion of the string representing the discovered numeric value
+    /// is removed from the beginning of the string.
+    template <typename T>
+    typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type
+    consumeInteger(unsigned Radix, T &Result) {
+      long long LLVal;
+      if (consumeSignedInteger(*this, Radix, LLVal) ||
+          static_cast<long long>(static_cast<T>(LLVal)) != LLVal)
+        return true;
+      Result = LLVal;
+      return false;
+    }
+
+    template <typename T>
+    typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type
+    consumeInteger(unsigned Radix, T &Result) {
+      unsigned long long ULLVal;
+      if (consumeUnsignedInteger(*this, Radix, ULLVal) ||
+          static_cast<long long>(static_cast<T>(ULLVal)) != ULLVal)
+        return true;
+      Result = ULLVal;
+      return false;
+    }
+
      /// Parse the current string as an integer of the specified \p Radix, or of
      /// an autosensed radix if the \p Radix given is 0.  The current value in
      /// \p Result is discarded, and the storage is changed to be wide enough to
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp

index 8a9da5edca84cca35ad6691e3e028d71bf7ba352..7503fac240ad084458e78a4a5bf7fd576d9800eb 100644 (file)
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -366,17 +366,16 @@ static unsigned GetAutoSenseRadix(StringRef &Str) {
      return 8;
    }
  
-  if (Str.startswith("0"))
+  if (Str[0] == '0' && Str.size() > 1 && ascii_isdigit(Str[1])) {
+    Str = Str.substr(1);
      return 8;
-  
+  }
+
    return 10;
  }
  
-
-/// GetAsUnsignedInteger - Workhorse method that converts a integer character
-/// sequence of radix up to 36 to an unsigned long long value.
-bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
-                                unsigned long long &Result) {
+bool llvm::consumeUnsignedInteger(StringRef &Str, unsigned Radix,
+                                  unsigned long long &Result) {
    // Autosense radix if not specified.
    if (Radix == 0)
      Radix = GetAutoSenseRadix(Str);
@@ -385,44 +384,51 @@ bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
    if (Str.empty()) return true;
  
    // Parse all the bytes of the string given this radix.  Watch for overflow.
+  StringRef Str2 = Str;
    Result = 0;
-  while (!Str.empty()) {
+  while (!Str2.empty()) {
      unsigned CharVal;
-    if (Str[0] >= '0' && Str[0] <= '9')
-      CharVal = Str[0]-'0';
-    else if (Str[0] >= 'a' && Str[0] <= 'z')
-      CharVal = Str[0]-'a'+10;
-    else if (Str[0] >= 'A' && Str[0] <= 'Z')
-      CharVal = Str[0]-'A'+10;
+    if (Str2[0] >= '0' && Str2[0] <= '9')
+      CharVal = Str2[0] - '0';
+    else if (Str2[0] >= 'a' && Str2[0] <= 'z')
+      CharVal = Str2[0] - 'a' + 10;
+    else if (Str2[0] >= 'A' && Str2[0] <= 'Z')
+      CharVal = Str2[0] - 'A' + 10;
      else
-      return true;
+      break;
  
-    // If the parsed value is larger than the integer radix, the string is
-    // invalid.
+    // If the parsed value is larger than the integer radix, we cannot
+    // consume any more characters.
      if (CharVal >= Radix)
-      return true;
+      break;
  
      // Add in this character.
      unsigned long long PrevResult = Result;
-    Result = Result*Radix+CharVal;
+    Result = Result * Radix + CharVal;
  
      // Check for overflow by shifting back and seeing if bits were lost.
-    if (Result/Radix < PrevResult)
+    if (Result / Radix < PrevResult)
        return true;
  
-    Str = Str.substr(1);
+    Str2 = Str2.substr(1);
    }
  
+  // We consider the operation a failure if no characters were consumed
+  // successfully.
+  if (Str.size() == Str2.size())
+    return true;
+
+  Str = Str2;
    return false;
  }
  
-bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
-                              long long &Result) {
+bool llvm::consumeSignedInteger(StringRef &Str, unsigned Radix,
+                                long long &Result) {
    unsigned long long ULLVal;
  
    // Handle positive strings first.
    if (Str.empty() || Str.front() != '-') {
-    if (getAsUnsignedInteger(Str, Radix, ULLVal) ||
+    if (consumeUnsignedInteger(Str, Radix, ULLVal) ||
          // Check for value so large it overflows a signed value.
          (long long)ULLVal < 0)
        return true;
@@ -431,17 +437,41 @@ bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
    }
  
    // Get the positive part of the value.
-  if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) ||
+  StringRef Str2 = Str.drop_front(1);
+  if (consumeUnsignedInteger(Str2, Radix, ULLVal) ||
        // Reject values so large they'd overflow as negative signed, but allow
        // "-0".  This negates the unsigned so that the negative isn't undefined
        // on signed overflow.
        (long long)-ULLVal > 0)
      return true;
  
+  Str = Str2;
    Result = -ULLVal;
    return false;
  }
  
+/// GetAsUnsignedInteger - Workhorse method that converts a integer character
+/// sequence of radix up to 36 to an unsigned long long value.
+bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
+                                unsigned long long &Result) {
+  if (consumeUnsignedInteger(Str, Radix, Result))
+    return true;
+
+  // For getAsUnsignedInteger, we require the whole string to be consumed or
+  // else we consider it a failure.
+  return !Str.empty();
+}
+
+bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
+                              long long &Result) {
+  if (consumeSignedInteger(Str, Radix, Result))
+    return true;
+
+  // For getAsSignedInteger, we require the whole string to be consumed or else
+  // we consider it a failure.
+  return !Str.empty();
+}
+
  bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
    StringRef Str = *this;
  
diff --git a/unittests/ADT/StringRefTest.cpp b/unittests/ADT/StringRefTest.cpp

index 97ddaf6adcc3a0ce9bd49a4f96cbae33922546fa..4249f231481d91ff2a606b9bb362b668cb7227b1 100644 (file)
--- a/unittests/ADT/StringRefTest.cpp
+++ b/unittests/ADT/StringRefTest.cpp
@@ -590,6 +590,183 @@ TEST(StringRefTest, getAsUnsignedIntegerBadStrings) {
    }
  }
  
+struct ConsumeUnsignedPair {
+  const char *Str;
+  uint64_t Expected;
+  const char *Leftover;
+} ConsumeUnsigned[] = {
+    {"0", 0, ""},
+    {"255", 255, ""},
+    {"256", 256, ""},
+    {"65535", 65535, ""},
+    {"65536", 65536, ""},
+    {"4294967295", 4294967295ULL, ""},
+    {"4294967296", 4294967296ULL, ""},
+    {"255A376", 255, "A376"},
+    {"18446744073709551615", 18446744073709551615ULL, ""},
+    {"18446744073709551615ABC", 18446744073709551615ULL, "ABC"},
+    {"042", 34, ""},
+    {"0x42", 66, ""},
+    {"0x42-0x34", 66, "-0x34"},
+    {"0b101010", 42, ""},
+    {"0429F", 042, "9F"},            // Auto-sensed octal radix, invalid digit
+    {"0x42G12", 0x42, "G12"},        // Auto-sensed hex radix, invalid digit
+    {"0b10101020101", 42, "20101"}}; // Auto-sensed binary radix, invalid digit.
+
+struct ConsumeSignedPair {
+  const char *Str;
+  int64_t Expected;
+  const char *Leftover;
+} ConsumeSigned[] = {
+    {"0", 0, ""},
+    {"-0", 0, ""},
+    {"0-1", 0, "-1"},
+    {"-0-1", 0, "-1"},
+    {"127", 127, ""},
+    {"128", 128, ""},
+    {"127-1", 127, "-1"},
+    {"128-1", 128, "-1"},
+    {"-128", -128, ""},
+    {"-129", -129, ""},
+    {"-128-1", -128, "-1"},
+    {"-129-1", -129, "-1"},
+    {"32767", 32767, ""},
+    {"32768", 32768, ""},
+    {"32767-1", 32767, "-1"},
+    {"32768-1", 32768, "-1"},
+    {"-32768", -32768, ""},
+    {"-32769", -32769, ""},
+    {"-32768-1", -32768, "-1"},
+    {"-32769-1", -32769, "-1"},
+    {"2147483647", 2147483647LL, ""},
+    {"2147483648", 2147483648LL, ""},
+    {"2147483647-1", 2147483647LL, "-1"},
+    {"2147483648-1", 2147483648LL, "-1"},
+    {"-2147483648", -2147483648LL, ""},
+    {"-2147483649", -2147483649LL, ""},
+    {"-2147483648-1", -2147483648LL, "-1"},
+    {"-2147483649-1", -2147483649LL, "-1"},
+    {"-9223372036854775808", -(9223372036854775807LL) - 1, ""},
+    {"-9223372036854775808-1", -(9223372036854775807LL) - 1, "-1"},
+    {"042", 34, ""},
+    {"042-1", 34, "-1"},
+    {"0x42", 66, ""},
+    {"0x42-1", 66, "-1"},
+    {"0b101010", 42, ""},
+    {"0b101010-1", 42, "-1"},
+    {"-042", -34, ""},
+    {"-042-1", -34, "-1"},
+    {"-0x42", -66, ""},
+    {"-0x42-1", -66, "-1"},
+    {"-0b101010", -42, ""},
+    {"-0b101010-1", -42, "-1"}};
+
+TEST(StringRefTest, consumeIntegerUnsigned) {
+  uint8_t U8;
+  uint16_t U16;
+  uint32_t U32;
+  uint64_t U64;
+
+  for (size_t i = 0; i < array_lengthof(ConsumeUnsigned); ++i) {
+    StringRef Str = ConsumeUnsigned[i].Str;
+    bool U8Success = Str.consumeInteger(0, U8);
+    if (static_cast<uint8_t>(ConsumeUnsigned[i].Expected) ==
+        ConsumeUnsigned[i].Expected) {
+      ASSERT_FALSE(U8Success);
+      EXPECT_EQ(U8, ConsumeUnsigned[i].Expected);
+      EXPECT_EQ(Str, ConsumeUnsigned[i].Leftover);
+    } else {
+      ASSERT_TRUE(U8Success);
+    }
+
+    Str = ConsumeUnsigned[i].Str;
+    bool U16Success = Str.consumeInteger(0, U16);
+    if (static_cast<uint16_t>(ConsumeUnsigned[i].Expected) ==
+        ConsumeUnsigned[i].Expected) {
+      ASSERT_FALSE(U16Success);
+      EXPECT_EQ(U16, ConsumeUnsigned[i].Expected);
+      EXPECT_EQ(Str, ConsumeUnsigned[i].Leftover);
+    } else {
+      ASSERT_TRUE(U16Success);
+    }
+
+    Str = ConsumeUnsigned[i].Str;
+    bool U32Success = Str.consumeInteger(0, U32);
+    if (static_cast<uint32_t>(ConsumeUnsigned[i].Expected) ==
+        ConsumeUnsigned[i].Expected) {
+      ASSERT_FALSE(U32Success);
+      EXPECT_EQ(U32, ConsumeUnsigned[i].Expected);
+      EXPECT_EQ(Str, ConsumeUnsigned[i].Leftover);
+    } else {
+      ASSERT_TRUE(U32Success);
+    }
+
+    Str = ConsumeUnsigned[i].Str;
+    bool U64Success = Str.consumeInteger(0, U64);
+    if (static_cast<uint64_t>(ConsumeUnsigned[i].Expected) ==
+        ConsumeUnsigned[i].Expected) {
+      ASSERT_FALSE(U64Success);
+      EXPECT_EQ(U64, ConsumeUnsigned[i].Expected);
+      EXPECT_EQ(Str, ConsumeUnsigned[i].Leftover);
+    } else {
+      ASSERT_TRUE(U64Success);
+    }
+  }
+}
+
+TEST(StringRefTest, consumeIntegerSigned) {
+  int8_t S8;
+  int16_t S16;
+  int32_t S32;
+  int64_t S64;
+
+  for (size_t i = 0; i < array_lengthof(ConsumeSigned); ++i) {
+    StringRef Str = ConsumeSigned[i].Str;
+    bool S8Success = Str.consumeInteger(0, S8);
+    if (static_cast<int8_t>(ConsumeSigned[i].Expected) ==
+        ConsumeSigned[i].Expected) {
+      ASSERT_FALSE(S8Success);
+      EXPECT_EQ(S8, ConsumeSigned[i].Expected);
+      EXPECT_EQ(Str, ConsumeSigned[i].Leftover);
+    } else {
+      ASSERT_TRUE(S8Success);
+    }
+
+    Str = ConsumeSigned[i].Str;
+    bool S16Success = Str.consumeInteger(0, S16);
+    if (static_cast<int16_t>(ConsumeSigned[i].Expected) ==
+        ConsumeSigned[i].Expected) {
+      ASSERT_FALSE(S16Success);
+      EXPECT_EQ(S16, ConsumeSigned[i].Expected);
+      EXPECT_EQ(Str, ConsumeSigned[i].Leftover);
+    } else {
+      ASSERT_TRUE(S16Success);
+    }
+
+    Str = ConsumeSigned[i].Str;
+    bool S32Success = Str.consumeInteger(0, S32);
+    if (static_cast<int32_t>(ConsumeSigned[i].Expected) ==
+        ConsumeSigned[i].Expected) {
+      ASSERT_FALSE(S32Success);
+      EXPECT_EQ(S32, ConsumeSigned[i].Expected);
+      EXPECT_EQ(Str, ConsumeSigned[i].Leftover);
+    } else {
+      ASSERT_TRUE(S32Success);
+    }
+
+    Str = ConsumeSigned[i].Str;
+    bool S64Success = Str.consumeInteger(0, S64);
+    if (static_cast<int64_t>(ConsumeSigned[i].Expected) ==
+        ConsumeSigned[i].Expected) {
+      ASSERT_FALSE(S64Success);
+      EXPECT_EQ(S64, ConsumeSigned[i].Expected);
+      EXPECT_EQ(Str, ConsumeSigned[i].Leftover);
+    } else {
+      ASSERT_TRUE(S64Success);
+    }
+  }
+}
+
  static const char *join_input[] = { "a", "b", "c" };
  static const char join_result1[] = "a";
  static const char join_result2[] = "a:b:c";
author	Zachary Turner <zturner@google.com>
	Thu, 22 Sep 2016 15:05:19 +0000 (15:05 +0000)
committer	Zachary Turner <zturner@google.com>
	Thu, 22 Sep 2016 15:05:19 +0000 (15:05 +0000)
include/llvm/ADT/StringRef.h		patch \| blob \| history
lib/Support/StringRef.cpp		patch \| blob \| history
unittests/ADT/StringRefTest.cpp		patch \| blob \| history