From 5417c0baab47a11b8b487c92738b62b6d42fe4d4 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Tue, 1 Aug 2017 16:41:58 +0100 Subject: [PATCH] Added fuzzers (contributed by Sergei Trofimovich). --- re2c/fuzz/regex_tdfa/__mk.sh | 4 + re2c/fuzz/regex_tdfa/check.hs | 171 +++++++++++++++++++++++++++ re2c/fuzz/tdfa0_tdfa1/__mk.sh | 4 + re2c/fuzz/tdfa0_tdfa1/check.hs | 124 +++++++++++++++++++ re2c/fuzz/tdfa0_tdfa1_posix/__mk.sh | 4 + re2c/fuzz/tdfa0_tdfa1_posix/check.hs | 98 +++++++++++++++ 6 files changed, 405 insertions(+) create mode 100755 re2c/fuzz/regex_tdfa/__mk.sh create mode 100644 re2c/fuzz/regex_tdfa/check.hs create mode 100755 re2c/fuzz/tdfa0_tdfa1/__mk.sh create mode 100644 re2c/fuzz/tdfa0_tdfa1/check.hs create mode 100755 re2c/fuzz/tdfa0_tdfa1_posix/__mk.sh create mode 100644 re2c/fuzz/tdfa0_tdfa1_posix/check.hs diff --git a/re2c/fuzz/regex_tdfa/__mk.sh b/re2c/fuzz/regex_tdfa/__mk.sh new file mode 100755 index 00000000..1025a551 --- /dev/null +++ b/re2c/fuzz/regex_tdfa/__mk.sh @@ -0,0 +1,4 @@ + +[ -x ../re2c ] || { echo "*** no re2c found ***"; exit 1; } + +ghc check.hs -Wall -fforce-recomp -O2 diff --git a/re2c/fuzz/regex_tdfa/check.hs b/re2c/fuzz/regex_tdfa/check.hs new file mode 100644 index 00000000..97480c9b --- /dev/null +++ b/re2c/fuzz/regex_tdfa/check.hs @@ -0,0 +1,171 @@ + +import qualified Test.QuickCheck as Q +import qualified Test.QuickCheck.Monadic as QM +import qualified System.Process as SP +import qualified System.Exit as SE +import qualified Data.ByteString.Char8 as BS +import Data.Char (ord) +import qualified Text.Regex.TDFA as X +import Control.Monad (when) + + +data E = A | B | C + | Empty + | NA | NB | NC + | Alt E E + | Cat E E + | Star E + | Plus E + | Mayb E + | FromTo Int Int E + | From Int E + + +instance Show E where + show x = case x of + A -> "[a]" + B -> "[b]" + C -> "[c]" + Empty -> "(\"\")" + NA -> "[^a]" + NB -> "[^b]" + NC -> "[^c]" +-- Alt l r -> show l ++ "|" ++ show r + Alt l r -> "(" ++ show l ++ "|" ++ show r ++ ")" +-- Cat l r -> show l ++ show r + Cat l r -> "(" ++ show l ++ show r ++ ")" + Star e -> "(" ++ show e ++ ")*" + Plus e -> "(" ++ show e ++ ")+" + Mayb e -> "(" ++ show e ++ ")?" + FromTo n m e -> "(" ++ show e ++ "){" ++ show n ++ "," ++ show m ++ "}" + From n e -> "(" ++ show e ++ "){" ++ show n ++ ",}" + + +show_posix :: E -> String +show_posix x = case x of + A -> "[a]" + B -> "[b]" + C -> "[c]" + Empty -> "()" + NA -> "[^a]" + NB -> "[^b]" + NC -> "[^c]" +-- Alt l r -> show_posix l ++ "|" ++ show_posix r + Alt l r -> "(" ++ show_posix l ++ "|" ++ show_posix r ++ ")" +-- Cat l r -> show_posix l ++ show_posix r + Cat l r -> "(" ++ show_posix l ++ show_posix r ++ ")" + Star e -> "(" ++ show_posix e ++ ")*" + Plus e -> "(" ++ show_posix e ++ ")+" + Mayb e -> "(" ++ show_posix e ++ ")?" + FromTo n m e -> "(" ++ show_posix e ++ "){" ++ show n ++ "," ++ show m ++ "}" + From n e -> "(" ++ show_posix e ++ "){" ++ show n ++ ",}" + + +instance Q.Arbitrary E where + arbitrary = do + d <- Q.choose (2,4) :: Q.Gen Int + arbitrary_d d + + +arbitrary_d :: (Enum a, Eq a, Num a) => a -> Q.Gen E +arbitrary_d 0 = do + Q.frequency + [ (1, pure Empty) + , (1, pure A) + , (1, pure B) + , (1, pure C) + , (1, pure NA) + , (1, pure NB) + , (1, pure NC) + ] + + +arbitrary_d d = do + n <- Q.choose (0,1) :: Q.Gen Int + m <- Q.choose (if n == 0 then 1 else n, 3) :: Q.Gen Int + Q.frequency + [ (1, pure Empty) + , (1, pure A) + , (1, pure B) + , (1, pure C) + , (1, pure NA) + , (1, pure NB) + , (1, pure NC) + , (30, Alt <$> arbitrary_d d' <*> arbitrary_d d') + , (30, Cat <$> arbitrary_d d' <*> arbitrary_d d') + , (10, Star <$> arbitrary_d d') + , (10, Plus <$> arbitrary_d d') + , (10, Mayb <$> arbitrary_d d') + , (10, FromTo n m <$> arbitrary_d d') + , (10, From n <$> arbitrary_d d') + ] + where d' = pred d + + +parse_input :: Int -> IO [(BS.ByteString, [Int], [BS.ByteString])] +parse_input ncaps = do + let step :: BS.ByteString -> BS.ByteString -> (BS.ByteString, [Int], [BS.ByteString], BS.ByteString) + step input key = + let ns'@(n1:n2:_:ns) = reverse $ BS.foldl' (\xs c -> ord c : xs) [] key + s = BS.take n2 input + ss = split ns s + rest = BS.drop n1 input + in (s, ns', ss, rest) + + go :: [BS.ByteString] -> BS.ByteString -> [(BS.ByteString, [Int], [BS.ByteString])] + go [] _ = [] + go (key:keys) input = + let (s, ns, ss, rest) = step input key + in (s, ns, ss) : go keys rest + + split :: [Int] -> BS.ByteString -> [BS.ByteString] + split [] _ = [] + split (n1:n2:ns) s = (BS.drop n1 . BS.take n2) s : split ns s + split _ _ = error "uneven number of keys" + + split_at :: Int -> BS.ByteString -> [BS.ByteString] + split_at _ s | s == BS.empty = [] + split_at n s | BS.length s < n = error "bad tail" + split_at n s = BS.take n s : split_at n (BS.drop n s) + + ncaps' = 2 * (ncaps + 1) + 3 + + input <- BS.readFile "a.c.line1.input" + keys <- split_at ncaps' <$> BS.readFile "a.c.line1.keys" + return $ go keys input + + +prop_test_re2c :: E -> Q.Property +prop_test_re2c r1 = QM.monadicIO $ do + let portable_empty = "[a]{0}" + re_file = "/*!re2c " ++ show r1 ++ "|" ++ portable_empty ++ " {} */" + re_posix = "^" ++ show_posix r1 ++ "|" ++ portable_empty + rr = X.makeRegex re_posix :: X.Regex + ncaps = length $ filter (== '(') re_posix + re2c = "../re2c" + + ok0 <- QM.run $ do + BS.writeFile "a.re" $ BS.pack re_file + SP.system $ "ulimit -t 10 && " ++ re2c + ++ " --posix-captures -Werror-undefined-control-flow -ST a.re -o a.c 2>>re2c_last_warning" + ++ " || exit 42 && gcc a.c -o a && ./a" + QM.assert $ ok0 `elem` [SE.ExitSuccess, SE.ExitFailure 42] + when (ok0 == SE.ExitSuccess) $ do + ss <- QM.run $ parse_input ncaps + mapM_ (\(s, ns, xs) -> do + let s1 = map BS.unpack xs + s2 = ((\x -> if x == [] then [] else head x) . X.match rr . BS.unpack) s + ok = s1 == s2 || (BS.filter (== '\n') s) /= BS.empty + QM.run $ when (not ok) $ do + print re_posix + print ncaps + print $ BS.unpack s + print ns + print s1 + print s2 + QM.assert ok + ) ss + + +main :: IO () +main = Q.quickCheckWith Q.stdArgs { Q.maxSuccess = 1000000 } prop_test_re2c diff --git a/re2c/fuzz/tdfa0_tdfa1/__mk.sh b/re2c/fuzz/tdfa0_tdfa1/__mk.sh new file mode 100755 index 00000000..1025a551 --- /dev/null +++ b/re2c/fuzz/tdfa0_tdfa1/__mk.sh @@ -0,0 +1,4 @@ + +[ -x ../re2c ] || { echo "*** no re2c found ***"; exit 1; } + +ghc check.hs -Wall -fforce-recomp -O2 diff --git a/re2c/fuzz/tdfa0_tdfa1/check.hs b/re2c/fuzz/tdfa0_tdfa1/check.hs new file mode 100644 index 00000000..778dc055 --- /dev/null +++ b/re2c/fuzz/tdfa0_tdfa1/check.hs @@ -0,0 +1,124 @@ + +import qualified Test.QuickCheck as Q +import qualified Test.QuickCheck.Monadic as QM +import qualified System.Process as SP +import qualified System.Exit as SE +import qualified Data.ByteString.Char8 as BS +import Data.Char (chr) + + +data E = A | B | C + | Empty + | NA | NB | NC + | Tag String + | HTag String + | Alt E E + | Cat E E + | Star E + | Plus E + | Mayb E + | FromTo Int Int E + | From Int E + + +instance Show E where + show x = case x of + A -> "[a]" + B -> "[b]" + C -> "[c]" + Empty -> "\"\"" + NA -> "[^a]" + NB -> "[^b]" + NC -> "[^c]" + Tag s -> "@t_" ++ s + HTag s -> "#t_" ++ s + Alt l r -> "(" ++ show l ++ "|" ++ show r ++ ")" + Cat l r -> "(" ++ show l ++ show r ++ ")" + Star e -> "(" ++ show e ++ ")*" + Plus e -> "(" ++ show e ++ ")+" + Mayb e -> "(" ++ show e ++ ")?" + FromTo n m e -> "(" ++ show e ++ "){" ++ show n ++ "," ++ show m ++ "}" + From n e -> "(" ++ show e ++ "){" ++ show n ++ ",}" + +instance Q.Arbitrary E where + arbitrary = do + d <- Q.choose (2, 4) :: Q.Gen Int + arbitrary_d d + + +arbitrary_d :: (Enum a, Eq a, Num a) => a -> Q.Gen E +arbitrary_d 0 = do + t1 <- Q.choose (97, 122) :: Q.Gen Int + t2 <- Q.choose (97, 122) :: Q.Gen Int + t3 <- Q.choose (97, 122) :: Q.Gen Int + Q.frequency + [ (1, pure Empty) + , (1, pure A) + , (1, pure B) + , (1, pure C) + , (1, pure NA) + , (1, pure NB) + , (1, pure NC) + , (3, pure $ Tag $ map chr [t1, t2, t3]) + , (3, pure $ HTag $ map chr [t1, t2, t3]) + ] +arbitrary_d d = do + n <- Q.choose (0, 1) :: Q.Gen Int + m <- Q.choose (if n < 2 then n + 1 else n, 3) :: Q.Gen Int + Q.frequency + [ (1, pure Empty) + , (1, pure A) + , (1, pure B) + , (1, pure C) + , (1, pure NA) + , (1, pure NB) + , (1, pure NC) + , (30, Alt <$> arbitrary_d d' <*> arbitrary_d d') + , (30, Cat <$> arbitrary_d d' <*> arbitrary_d d') + , (10, Star <$> arbitrary_d d') + , (10, Plus <$> arbitrary_d d') + , (10, Mayb <$> arbitrary_d d') + , (10, FromTo n m <$> arbitrary_d d') + , (10, From n <$> arbitrary_d d') + ] + where d' = pred d + + +prop_test_re2c :: E -> E -> Q.Property +prop_test_re2c r1 r2 = QM.monadicIO $ do + let re_file = unlines [ "/*!re2c" + , show r1 ++ " {}" + , show r2 ++ " {}" + , "\"\" {}" + , "*/" + ] + re2c = "../re2c" + re2c_flags = " -Werror-undefined-control-flow -ST " + cxx = "gcc" + cxx_flags = " -Werror=incompatible-pointer-types " + + s0 <- QM.run $ do + BS.writeFile "a.re" $ BS.pack re_file + SP.system $ concat + [ "ulimit -t 10 && " + , re2c ++ re2c_flags ++ " --no-lookahead a.re -o a.c 2>>re2c_last_warning " + , "|| exit 42 " + , "&& cp a.c b.c" + ] + + s1 <- QM.run $ do + BS.writeFile "a.re" $ BS.pack re_file + SP.system $ concat + [ "ulimit -t 10 && " + , re2c ++ re2c_flags ++ " a.re -o a.c 2>>re2c_last_warning " + , "|| exit 42 " + , "&& " ++ cxx ++ cxx_flags ++ " a.c -o a && ./a " + , "&& " ++ cxx ++ cxx_flags ++ " b.c -o b && ./b " + ] + + QM.assert $ s0 `elem` [SE.ExitSuccess, SE.ExitFailure 42] + QM.assert $ s1 `elem` [SE.ExitSuccess, SE.ExitFailure 42] + + +main :: IO () +main = Q.quickCheckWith Q.stdArgs { Q.maxSuccess = 1000000 } prop_test_re2c diff --git a/re2c/fuzz/tdfa0_tdfa1_posix/__mk.sh b/re2c/fuzz/tdfa0_tdfa1_posix/__mk.sh new file mode 100755 index 00000000..1025a551 --- /dev/null +++ b/re2c/fuzz/tdfa0_tdfa1_posix/__mk.sh @@ -0,0 +1,4 @@ + +[ -x ../re2c ] || { echo "*** no re2c found ***"; exit 1; } + +ghc check.hs -Wall -fforce-recomp -O2 diff --git a/re2c/fuzz/tdfa0_tdfa1_posix/check.hs b/re2c/fuzz/tdfa0_tdfa1_posix/check.hs new file mode 100644 index 00000000..56bcd3f9 --- /dev/null +++ b/re2c/fuzz/tdfa0_tdfa1_posix/check.hs @@ -0,0 +1,98 @@ + +import qualified Test.QuickCheck as Q +import qualified Test.QuickCheck.Monadic as QM +import qualified System.Process as SP +import qualified System.Exit as SE +import qualified Data.ByteString.Char8 as BS + + +data E = A | B | C + | Empty + | NA | NB | NC + | Alt E E + | Cat E E + | Star E + | Plus E + | Mayb E + | FromTo Int Int E + | From Int E + + +instance Show E where + show x = case x of + A -> "[a]" + B -> "[b]" + C -> "[c]" + Empty -> "\"\"" + NA -> "[^a]" + NB -> "[^b]" + NC -> "[^c]" + Alt l r -> "(" ++ show l ++ "|" ++ show r ++ ")" + Cat l r -> "(" ++ show l ++ show r ++ ")" + Star e -> "(" ++ show e ++ ")*" + Plus e -> "(" ++ show e ++ ")+" + Mayb e -> "(" ++ show e ++ ")?" + FromTo n m e -> "(" ++ show e ++ "){" ++ show n ++ "," ++ show m ++ "}" + From n e -> "(" ++ show e ++ "){" ++ show n ++ ",}" + +instance Q.Arbitrary E where + arbitrary = do + d <- Q.choose (2, 4) :: Q.Gen Int + arbitrary_d d + + +arbitrary_d :: (Enum a, Eq a, Num a) => a -> Q.Gen E +arbitrary_d 0 = do + Q.frequency + [ (1, pure Empty) + , (1, pure A) + , (1, pure B) + , (1, pure C) + , (1, pure NA) + , (1, pure NB) + , (1, pure NC) + ] +arbitrary_d d = do + n <- Q.choose (0, 1) :: Q.Gen Int + m <- Q.choose (if n < 2 then n + 1 else n, 3) :: Q.Gen Int + Q.frequency + [ (1, pure Empty) + , (1, pure A) + , (1, pure B) + , (1, pure C) + , (1, pure NA) + , (1, pure NB) + , (1, pure NC) + , (30, Alt <$> arbitrary_d d' <*> arbitrary_d d') + , (30, Cat <$> arbitrary_d d' <*> arbitrary_d d') + , (10, Star <$> arbitrary_d d') + , (10, Plus <$> arbitrary_d d') + , (10, Mayb <$> arbitrary_d d') + , (10, FromTo n m <$> arbitrary_d d') + , (10, From n <$> arbitrary_d d') + ] + where d' = pred d + + +prop_test_re2c :: E -> E -> Q.Property +prop_test_re2c r1 r2 = QM.monadicIO $ do + let re_file = unlines [ "/*!re2c" + , show r1 ++ " {}" + , show r2 ++ " {}" + , "\"\" {}" + , "*/" + ] + re2c = "../re2c" + + s0 <- QM.run $ do BS.writeFile "a.re" $ BS.pack re_file + SP.system $ "ulimit -t 10 && " ++ re2c ++ " --no-lookahead --posix-captures -Werror-undefined-control-flow -ST a.re -o a.c 2>>re2c_last_warning || exit 42 && cp a.c b.c" + + s1 <- QM.run $ do BS.writeFile "a.re" $ BS.pack re_file + SP.system $ "ulimit -t 10 && " ++ re2c ++ " --posix-captures -Werror-undefined-control-flow -ST a.re -o a.c 2>>re2c_last_warning || exit 42 && gcc a.c -o a && ./a && gcc b.c -o b && ./b" + + QM.assert $ s0 `elem` [SE.ExitSuccess, SE.ExitFailure 42] + QM.assert $ s1 `elem` [SE.ExitSuccess, SE.ExitFailure 42] + + +main :: IO () +main = Q.quickCheckWith Q.stdArgs { Q.maxSuccess = 1000000 } prop_test_re2c -- 2.40.0