{-# LANGUAGE OverloadedStrings #-} module Main where import Control.Exception (evaluate) import System.Random import Test.Hspec import Test.HUnit.Lang import Control.Monad (when) import Text.Regex.PCRE import qualified Data.ByteString.Char8 as S import Text.Regex.Regen shouldGenOpts :: StdGen -> Options -> S.ByteString -> Expectation shouldGenOpts gen o reg = do p <- parsePatternOpts o reg case let (e,_) = generatePattern p gen in e of Left e -> assertFailure (eThrown e) Right bs -> do bs' <- evaluate $ bs =~ reg when (bs /= bs') $ assertFailure (eUnexp bs bs') where eThrown e = "`generate` threw exception on " ++ show reg ++ ": " ++ show e eUnexp bs bs' = show reg ++ " generated " ++ show bs ++ " but matched " ++ show bs' main :: IO () main = do seed <- randomIO let gen = mkStdGen seed shouldGen = shouldGenOpts gen defaultOptions hspec $ test seed shouldGen test :: Int -> (S.ByteString -> Expectation) -> Spec test seed f = describe ("generate (seed = " ++ show seed ++ ")") $ mapM_ ($f) [ literals , classes , subpatterns , quantifiers , anchors , globalOpts , localOpts , verbs , callouts , lookarounds , calls ] literals :: (S.ByteString -> Expectation) -> Spec literals shouldGen = describe "handles literals and escapes" $ do it "accepts a literal string" $ shouldGen "foo" it "accepts an escaped literal" $ shouldGen "\\F\\H\\8\\9" it "accepts hex escapes" $ shouldGen "\\x0\\x61\\x{62}\\x7f" it "accepts octal escapes" $ shouldGen "\\0\\o{141}\\177" it "accepts whitespace escape" $ shouldGen "\\R" it "accepts C escapes" $ shouldGen "\\a\\n\\f\\t" it "accepts escaped specials" $ shouldGen "\\[\\(\\|\\$\\^\\.\\*" it "rejects out-of-range hex escapes" $ generate "\\x80" `shouldThrow` isEParserError it "rejects out-of-range octal escapes" $ generate "\\200" `shouldThrow` isEParserError it "rejects '\\C'" $ generate "\\C" `shouldThrow` isENotSupported it "rejects '\\X'" $ generate "\\X" `shouldThrow` isENotSupported it "rejects '\\L'" $ generate "\\L" `shouldThrow` isENotSupported it "rejects '\\l'" $ generate "\\l" `shouldThrow` isENotSupported it "rejects '\\U'" $ generate "\\U" `shouldThrow` isENotSupported it "rejects '\\u'" $ generate "\\u" `shouldThrow` isENotSupported it "rejects '\\Q'" $ generate "\\Q" `shouldThrow` isENotSupported it "rejects '\\E'" $ generate "\\E" `shouldThrow` isENotSupported it "rejects '\\o'" $ generate "\\o" `shouldThrow` isEParserError it "rejects '\\cx'" $ generate "\\c" `shouldThrow` isENotSupported classes :: (S.ByteString -> Expectation) -> Spec classes shouldGen = describe "handles character classes" $ do it "accepts '.'" $ shouldGen "." it "accepts character class" $ shouldGen "[asdf]" it "accepts negated class" $ shouldGen "[^asdf]" it "accepts class with '-'" $ shouldGen "[-]" it "accepts negated class with '-'" $ shouldGen "[^-]" it "accepts class with ']'" $ shouldGen "[]]" it "accepts negated class with ']'" $ shouldGen "[^]]" it "accepts class range" $ shouldGen "[a-c]" it "accepts multiple-range class" $ shouldGen "[a-cd-f]" it "accepts negated class range" $ shouldGen "[^A-z]" it "accepts canned range in class" $ shouldGen "[\\d]" it "accepts negated canned range" $ shouldGen "[^\\d]" it "accepts posix class" $ shouldGen "[[:alpha:]]" it "accepts negated posix class" $ shouldGen "[^[:alpha:]]" it "accepts class with everything" $ shouldGen "[]\\wxa-b[:digit:]-]" it "accepts partial posix class" $ shouldGen "[[:f]" it "accepts class with repeats" $ shouldGen "[ooo]" it "accepts class with escapes" $ shouldGen "[\\x61\\141\\0\\n\\A]" it "accepts canned classes" $ shouldGen "\\d\\D\\w\\W" it "rejects malformed class ranges" $ generate "[z-a]" `shouldThrow` isEClassOutOfOrder it "rejects good property ranges" $ generate "\\p{Ll}" `shouldThrow` isENotSupported it "rejects bad property ranges" $ generate "\\p{bad}" `shouldThrow` isENotSupported it "rejects unicode '\\X'" $ generate "\\X" `shouldThrow` isENotSupported it "rejects unicode '\\C'" $ generate "\\C" `shouldThrow` isENotSupported it "rejects posix range out of class" $ generate "[:alpha:]" `shouldThrow` isENotSupported it "rejects bad posix classes" $ generate "[[:foo:]]" `shouldThrow` isENoSuchClass subpatterns :: (S.ByteString -> Expectation) -> Spec subpatterns shouldGen = describe "handles subpatterns" $ do it "accepts numbered groups" $ shouldGen "(x)\\1" it "accepts each numbered ref syntax" $ shouldGen "(x)\\1\\g1\\g{1}" it "accepts nested numbered groups" $ shouldGen "((x)y)\\1\\2" it "accepts named groups" $ shouldGen "(?Px)(?P=foo)" it "accepts numbered refs to named groups" $ shouldGen "(?Px)\\1" it "accepts non-capturing groups" $ shouldGen "(?:foo)(x)\\1" it "accepts atomic groups" $ shouldGen "(?>foo)(x)\\1" it "accepts relative group refs" $ shouldGen "(x)(y)(z)\\g{-2}\\g{-1}\\g{-3}" it "accepts nested named groups" $ shouldGen "(?P(?Px)y)(?P=xy)(?P=x)" it "accepts each named group syntax" $ shouldGen "(?x)(?'y'y)(?Pz)(?P=x)(?P=z)(?P=y)" it "accepts each named ref syntax" $ shouldGen "(?x)\\k'x'\\k\\k{x}\\g{x}(?P=x)" it "rejects duplicate ref names" $ generate "(?x)(?y)" `shouldThrow` isEDupGroupName it "rejects refs to non-existent group numbers" $ generate "\\g1" `shouldThrow` isENoSuchIndex it "rejects refs to non-existent group names" $ generate "(?P=foo)" `shouldThrow` isENoSuchIndex it "rejects bad group names" $ generate "(?P<9foo>)" `shouldThrow` isEBadGroupName it "rejects bad group names in references" $ generate "(?P=9foo)" `shouldThrow` isEBadGroupName it "rejects reset groups" $ generate "(?|foo)" `shouldThrow` isENotSupported quantifiers :: (S.ByteString -> Expectation) -> Spec quantifiers shouldGen = describe "handles quantifiers" $ do it "accepts fixed quantifiers" $ shouldGen "a{5}" it "accepts bounded quantifiers" $ shouldGen "a{1,3}" it "accepts upper-bounded quantifiers" $ shouldGen "a{,3}" it "accepts lower-bounded quantifiers" $ shouldGen "a{3,}" it "accepts star quantifer" $ shouldGen "a*" it "accepts plus quantifer" $ shouldGen "a+" it "accepts question-mark quantifier" $ shouldGen "a?" it "accepts quantified char classes" $ shouldGen "[asdf]{1,3}" it "accepts quantified C escapes" $ shouldGen "\\a{1,3}" it "accepts quantified canned classes" $ shouldGen "\\d{1,3}" it "accepts quantified hex escapes" $ shouldGen "\\x61{1,3}" it "accepts quantified octal escapes" $ shouldGen "\\141{1,3}" it "accepts quantified dot" $ shouldGen ".{1,3}" it "accepts quantified groups" $ shouldGen "(asdf){1,3}" it "rejects lazy quantifiers" $ generate "a{1,3}?a" `shouldThrow` isENotSupported it "rejects possessive quantifiers" $ generate "a{1,3}+a" `shouldThrow` isENotSupported it "rejects out-of-order ranges in quantifiers" $ generate "a{3,1}" `shouldThrow` isEQuantOutOfOrder it "rejects naked quantifiers" $ generate "{1,3}" `shouldThrow` isEParserError it "rejects quantifier after quantifier" $ generate "a{1,3}{1,3}" `shouldThrow` isEParserError it "rejects quantifier after anchor" $ generate "^{1,3}" `shouldThrow` isEParserError anchors :: (S.ByteString -> Expectation) -> Spec anchors shouldGen = describe "handles anchors" $ do describe "carat" $ do it "works at start of pattern" $ shouldGen "^foo" it "works in alternation" $ shouldGen "aa|^zz" it "fails within pattern" $ generate "a^" `shouldThrow` isEGenError describe "dollar" $ do it "works at end of pattern" $ shouldGen "foo$" it "works in alternation" $ shouldGen "aa$|zz" it "works before newlines" $ shouldGen "foo$\\n\\n" it "fails within pattern" $ generate "$a" `shouldThrow` isEGenError describe "\\A" $ do it "works at start of pattern" $ shouldGen "\\Afoo" it "works in alteration" $ shouldGen "aa|\\Azz" it "fails within pattern" $ generate "a\\A" `shouldThrow` isEGenError describe "\\Z" $ do it "works at end of pattern" $ shouldGen "foo\\Z" it "works in alternation" $ shouldGen "aa\\Z|zz" it "fails within pattern" $ generate "\\Za" `shouldThrow` isEGenError describe "\\z" $ do it "works at end of pattern" $ shouldGen "foo\\z" it "works in alternation" $ shouldGen "aa\\z|zz" it "fails within pattern" $ generate "\\za" `shouldThrow` isEGenError describe "\\b" $ do it "works before space" $ shouldGen "foo\\b bar" it "works after space" $ shouldGen "foo \\bbar" it "works at start of pattern" $ shouldGen "\\baa" it "works at end of pattern" $ shouldGen "zz\\b" it "fails within word" $ generate "foo\\bbar" `shouldThrow` isEGenError describe "\\B" $ do it "works within word" $ shouldGen "foo\\Bbar" it "fails before space" $ generate "foo\\B bar" `shouldThrow` isEGenError it "fails after space" $ generate "foo \\Bbar" `shouldThrow` isEGenError it "fails at start of pattern" $ generate "\\Baa" `shouldThrow` isEGenError it "fails at end of pattern" $ generate "zz\\B" `shouldThrow` isEGenError describe "\\G" $ do it "is unsupported" $ generate "foo\\G." `shouldThrow` isENotSupported globalOpts :: (S.ByteString -> Expectation) -> Spec globalOpts shouldGen = describe "global options" $ do it "ignores 'LIMIT_MATCH='" $ shouldGen "(*LIMIT_MATCH=10)foo" it "ignores 'LIMIT_RECURSION='" $ shouldGen "(*LIMIT_RECURSION=10)foo" it "ignores 'NO_AUTO_POSSESS'" $ shouldGen "(*NO_AUTO_POSSESS)foo" it "ignores 'NO_START_OPT'" $ shouldGen "(*NO_START_OPT)foo" it "ignores 'BSR_ANYCRLF'" $ shouldGen "(*BSR_ANYCRLF)foo" it "allows 'LF'" $ shouldGen "(*LF)foo" it "allows 'CR'" $ shouldGen "(*CR)foo" it "allows 'ANY'" $ shouldGen "(*ANY)foo" it "allows 'ANYCRLF'" $ shouldGen "(*ANYCRLF)foo" it "rejects 'CRLF'" $ generate "(*CRLF)foo" `shouldThrow` isENotSupported it "rejects 'BSR_UNICODE'" $ generate "(*BSR_UNICODE)foo" `shouldThrow` isENotSupported it "rejects 'UTF8'" $ generate "(*UTF8)foo" `shouldThrow` isENotSupported it "rejects 'UTF16'" $ generate "(*UTF16)foo" `shouldThrow` isENotSupported it "rejects 'UTF32'" $ generate "(*UTF32)foo" `shouldThrow` isENotSupported it "rejects 'UTF'" $ generate "(*UTF)foo" `shouldThrow` isENotSupported it "rejects 'UCP'" $ generate "(*UCP)foo" `shouldThrow` isENotSupported it "rejects bad global options" $ generate "(*BLAH)foo" `shouldThrow` isEParserError localOpts :: (S.ByteString -> Expectation) -> Spec localOpts shouldGen = describe "local options" $ do it "allows setting ignore-case" $ shouldGen "(?i)asdf[asdf]{5}" it "allows unsetting ignore-case" $ shouldGen "(?i)asdf(?-i)asdf[asdf]{5}" it "allows unsetting multiline" $ shouldGen "(?-m)foo" it "allows unsetting dup-groups" $ shouldGen "(?-J)foo" it "allows setting dotall" $ shouldGen "(?s).{100}" it "allows unsetting dotall" $ shouldGen "(?s).{100}(?-s).{100}" it "allows unsetting ungreedy" $ shouldGen "(?-U)foo" it "allows setting free-spacing" $ shouldGen "(?x)a s\\td\\nf(?#x)g" it "allows unsetting free-spacing" $ shouldGen "(?x)a s(?-x)a s\\td\\nf(?#x)g" it "allows setting multi options" $ shouldGen "(?ix)a s\\td\\nf(?#x)g" it "allows unsetting multi options" $ shouldGen "(?ix)(?-ix)a s\\td\\nf(?#x)g" it "allows setting and unsetting" $ shouldGen "(?i)(?x-i)a s\\td\\nf(?#x)g" it "rejects setting dup-groups" $ generate "(?J)foo" `shouldThrow` isENotSupported it "rejects setting ungreedy" $ generate "(?U)foo" `shouldThrow` isENotSupported verbs :: (S.ByteString -> Expectation) -> Spec verbs _ = describe "backtracking verbs" $ do it "rejects 'ACCEPT' verb" $ generate "foo(*ACCEPT)bar" `shouldThrow` isENotSupported it "rejects 'FAIL' verb" $ generate "foo(*FAIL)bar" `shouldThrow` isENotSupported it "rejects 'F' verb" $ generate "foo(*F)bar" `shouldThrow` isENotSupported it "rejects 'COMMIT' verb" $ generate "foo(*COMMIT)bar" `shouldThrow` isENotSupported it "rejects 'PRUNE' verb" $ generate "foo(*PRUNE)bar" `shouldThrow` isENotSupported it "rejects 'SKIP' verb" $ generate "foo(*SKIP)bar" `shouldThrow` isENotSupported it "rejects 'MARK:' verb" $ generate "foo(*MARK:blah)bar" `shouldThrow` isENotSupported it "rejects ':' verb" $ generate "foo(*:blah)bar" `shouldThrow` isENotSupported it "rejects 'COMMIT:' verb" $ generate "foo(*COMMIT:blah)bar" `shouldThrow` isENotSupported it "rejects 'PRUNE:' verb" $ generate "foo(*PRUNE:blah)bar" `shouldThrow` isENotSupported it "rejects 'SKIP:' verb" $ generate "foo(*SKIP:blah)bar" `shouldThrow` isENotSupported callouts :: (S.ByteString -> Expectation) -> Spec callouts _ = describe "callouts" $ do it "rejects callouts" $ generate "(?C)" `shouldThrow` isENotSupported it "rejects callouts with arguments" $ generate "(?C42)" `shouldThrow` isENotSupported lookarounds :: (S.ByteString -> Expectation) -> Spec lookarounds _ = describe "lookarounds" $ do it "rejects positive lookahead" $ generate "(?=blah)" `shouldThrow` isENotSupported it "rejects negative lookahead" $ generate "(?!blah)" `shouldThrow` isENotSupported it "rejects positive lookbehind" $ generate "(?<=blah)" `shouldThrow` isENotSupported it "rejects negative lookbehind" $ generate "(? Expectation) -> Spec calls _ = describe "pattern calls" $ do it "rejects global recursive calls" $ generate "(?R)" `shouldThrow` isENotSupported it "rejects absolutely numbered calls with (?n)" $ generate "(...)(?1)" `shouldThrow` isENotSupported it "rejects relatively numbered calls with (?-n)" $ generate "(...)(?-1)" `shouldThrow` isENotSupported it "rejects relatively numbered calls with (?+n)" $ generate "(x)(?+1)(...)" `shouldThrow` isENotSupported it "rejects absolutely numbered calls with \\g'n'" $ generate "(...)\\g'1'" `shouldThrow` isENotSupported it "rejects relatively numbered calls with \\g'-n'" $ generate "(...)\\g'-1'" `shouldThrow` isENotSupported it "rejects relatively numbered calls with \\g'+n'" $ generate "(x)\\g'+1'(...)" `shouldThrow` isENotSupported it "rejects absolutely numbered calls with \\g" $ generate "(...)\\g<1>" `shouldThrow` isENotSupported it "rejects relatively numbered calls with \\g<-n>" $ generate "(...)\\g<-1>" `shouldThrow` isENotSupported it "rejects relatively numbered calls with \\g<+n>" $ generate "(x)\\g<+1>(...)" `shouldThrow` isENotSupported it "rejects named calls with (?&)" $ generate "(?...)(?&x)" `shouldThrow` isENotSupported it "rejects named calls with (?P>)" $ generate "(?...)(?P>x)" `shouldThrow` isENotSupported it "rejects named calls with \\g''" $ generate "(?...)\\g'x'" `shouldThrow` isENotSupported it "rejects named calls with \\g<>" $ generate "(?...)\\g" `shouldThrow` isENotSupported