Regex Test Suite
================
All of the regex exampes are self-testing and together make up the
regex test suite run during development and over each release of the
test suite. But here we have the unit an small-check tests used to
systematically probe the library for weak points and guard against
regressions.
\begin{code}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Main (main) where
import Control.Exception
import Control.Monad
import Data.Array
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as LBS
import qualified Data.ByteString.Lazy.UTF8 as LBS
import qualified Data.ByteString.UTF8 as B
import Data.Char
import qualified Data.Foldable as F
import qualified Data.HashMap.Strict as HM
import Data.Maybe
import qualified Data.Monoid as M
import qualified Data.Sequence as S
import Data.String
import qualified Data.Text as T
import qualified Data.Text.Lazy as LT
import Data.Typeable
import Language.Haskell.TH.Quote
import Prelude.Compat
import System.Directory
import System.FilePath
import qualified System.Info as SI
import Test.SmallCheck.Series
import Test.Tasty
import Test.Tasty.HUnit
import Test.Tasty.SmallCheck as SC
import TestKit
import Text.Heredoc
import qualified Text.RE.PCRE as PCRE
import qualified Text.RE.PCRE.ByteString as P_BS
import qualified Text.RE.PCRE.ByteString.Lazy as PLBS
import qualified Text.RE.PCRE.Sequence as P_SQ
import qualified Text.RE.PCRE.String as P_ST
import qualified Text.RE.PCRE.Text as P_TX
import qualified Text.RE.PCRE.Text.Lazy as PLTX
import Text.RE.REOptions
import Text.RE.Replace
import Text.RE.TDFA as TDFA
import qualified Text.RE.TDFA.ByteString as T_BS
import qualified Text.RE.TDFA.ByteString.Lazy as TLBS
import qualified Text.RE.TDFA.Sequence as T_SQ
import qualified Text.RE.TDFA.String as T_ST
import qualified Text.RE.TDFA.Text as T_TX
import qualified Text.RE.TDFA.Text.Lazy as TLTX
import Text.RE.TestBench
import Text.RE.Tools.Find
import Text.RE.Tools.Sed
import Text.RE.ZeInternals
import qualified Text.Regex.PCRE as PCRE_
import qualified Text.Regex.TDFA as TDFA_
\end{code}
\begin{code}
main :: IO ()
main = do
print SI.os
defaultMain $
testGroup "Tests"
[ prelude_tests
, compiling_tests
, core_tests
, replace_methods_tests
, search_replace_tests
, options_tests
, named_capture_tests
, many_tests
, escape_tests
, add_capture_names_tests
, find_tests
, backslash_tests
, misc_tests
]
\end{code}
\begin{code}
-- | check that our self-testing macro environments are good
prelude_tests :: TestTree
prelude_tests = testGroup "Prelude"
[ tc TDFA.regexType TDFA.preludeEnv
, tc PCRE.regexType PCRE.preludeEnv
]
where
tc rty m_env =
testCase (show rty) $ do
is_docs <- doesDirectoryExist "docs"
when is_docs $
dumpMacroTable (fp "docs" ".txt") (fp "docs" "-src.txt") rty m_env
dumpMacroTable (fp "data" ".txt") (fp "data" "-src.txt") rty m_env
assertBool "testMacroEnv" =<< testMacroEnv "prelude" rty m_env
where
fp dir sfx = dir > (rty_s ++ "-macros" ++ sfx)
rty_s = map toLower $ presentRegexType rty
\end{code}
Core Match/Replace Tests
------------------------
test vectors
The core tests rely on these simple test vectors.
\begin{code}
-- | our standard test strings
str_, str' :: String
str_ = "a bbbb aa b"
str' = "foo"
-- | standard test REs
regex_, regex_alt :: RE
regex_ = [re|(a+) (b+)|]
regex_alt = [re|(a+)|(b+)|]
-- | golden matches result 1
regex_str_matches :: Matches String
regex_str_matches =
Matches
{ matchesSource = str_
, allMatches =
[ regex_str_match
, regex_str_match_2
]
}
-- | golden match result 1
regex_str_match :: Match String
regex_str_match =
Match
{ matchSource = str_
, captureNames = noCaptureNames
, matchArray = array (0,2)
[ (0,Capture {captureSource = str_, capturedText = "a bbbb", captureOffset = 0, captureLength = 6})
, (1,Capture {captureSource = str_, capturedText = "a" , captureOffset = 0, captureLength = 1})
, (2,Capture {captureSource = str_, capturedText = "bbbb" , captureOffset = 2, captureLength = 4})
]
}
-- | golden match result 2
regex_str_match_2 :: Match String
regex_str_match_2 =
Match
{ matchSource = str_
, captureNames = noCaptureNames
, matchArray = array (0,2)
[ (0,Capture {captureSource = str_, capturedText = "aa b", captureOffset = 7 , captureLength = 4})
, (1,Capture {captureSource = str_, capturedText = "aa" , captureOffset = 7 , captureLength = 2})
, (2,Capture {captureSource = str_, capturedText = "b" , captureOffset = 10, captureLength = 1})
]
}
-- | golden match result 2
regex_alt_str_matches :: Matches String
regex_alt_str_matches =
Matches
{ matchesSource = str_
, allMatches =
[ Match
{ matchSource = str_
, captureNames = noCaptureNames
, matchArray = array (0,2)
[ (0,Capture {captureSource = str_, capturedText = "a", captureOffset = 0, captureLength = 1})
, (1,Capture {captureSource = str_, capturedText = "a", captureOffset = 0, captureLength = 1})
, (2,Capture {captureSource = str_, capturedText = "", captureOffset = -1, captureLength = 0})
]
}
, Match
{ matchSource = str_
, captureNames = noCaptureNames
, matchArray = array (0,2)
[ (0,Capture {captureSource = str_, capturedText = "bbbb", captureOffset = 2 , captureLength = 4})
, (1,Capture {captureSource = str_, capturedText = "" , captureOffset = -1, captureLength = 0})
, (2,Capture {captureSource = str_, capturedText = "bbbb", captureOffset = 2 , captureLength = 4})
]
}
, Match
{ matchSource = str_
, captureNames = noCaptureNames
, matchArray = array (0,2)
[ (0,Capture {captureSource = str_, capturedText = "aa", captureOffset = 7 , captureLength = 2})
, (1,Capture {captureSource = str_, capturedText = "aa", captureOffset = 7 , captureLength = 2})
, (2,Capture {captureSource = str_, capturedText = "" , captureOffset = -1, captureLength = 0})
]
}
, Match
{ matchSource = str_
, captureNames = noCaptureNames
, matchArray = array (0,2)
[ (0,Capture {captureSource = str_, capturedText = "b", captureOffset = 10, captureLength = 1})
, (1,Capture {captureSource = str_, capturedText = "" , captureOffset = -1, captureLength = 0})
, (2,Capture {captureSource = str_, capturedText = "b", captureOffset = 10, captureLength = 1})
]
}
]
}
\end{code}
testing the compileRegex functions
\begin{code}
compiling_tests :: TestTree
compiling_tests = testGroup "Compiling"
[ testCase "complete check (matchM/ByteString)" $ do
r <- TDFA.compileRegex $ reSource regex_
assertEqual "Match" (B.pack <$> regex_str_match) $ B.pack str_ ?=~ r
, testCase "matched (matchM/Text)" $ do
r <- TDFA.compileRegex $ reSource regex_
assertBool "matched" $ matched $ T.pack str_ ?=~ r
, tc "TDFA.String" TDFA.compileRegex
, tc "TDFA.B" $ TDFA.compileRegex . B.unpack
, tc "TDFA.LBS" $ TDFA.compileRegex . LBS.unpack
, tc "TDFA.T" $ TDFA.compileRegex . T.unpack
, tc "TDFA.LT" $ TDFA.compileRegex . LT.unpack
, tc "TDFA.S" $ TDFA.compileRegex . s_toList
, tc "PCRE.String" PCRE.compileRegex
, tc "PCRE.B" $ PCRE.compileRegex . B.unpack
, tc "PCRE.LBS" $ PCRE.compileRegex . LBS.unpack
, tc "PCRE.S" $ PCRE.compileRegex . s_toList
]
where
tc :: IsRegex re s => String -> (s->IO re) -> TestTree
tc lab mk0 = testGroup lab
[ testCase "loop" $ do
r <- mk re_s
assertEqual "RE" re_s $ regexSource r
, testCase "Match" $ do
r <- mk' re_s
assertEqual "Match" (pk <$> regex_str_match) $ matchOnce r $ pk str_
, testCase "Escape" $ do
r <- esc $ pk "foobar"
assertEqual "String" (pk "bar") $ matchSource $ matchOnce r $ pk "bar"
]
where
mk = makeRegex `asTypeOf` mk0
mk' = makeRegexWith minBound `asTypeOf` mk0
esc = makeEscaped id `asTypeOf` mk0
re_s = pk $ reSource regex_
pk = mk_pk mk0
mk_pk :: Replace s' => (s'->IO re') -> String -> s'
mk_pk _ = packR
\end{code}
core tests
\begin{code}
core_tests :: TestTree
core_tests = testGroup "Match"
[ testCase "text (=~~Text.Lazy)" $ do
txt <- LT.pack str_ =~~ [re|(a+) (b+)|] :: IO (LT.Text)
assertEqual "text" txt "a bbbb"
, testCase "multi (=~~/String)" $ do
let sm = str_ =~ regex_ :: Match String
m = capture [cp|0|] sm
assertEqual "captureSource" "a bbbb aa b" $ captureSource m
assertEqual "capturedText" "a bbbb" $ capturedText m
assertEqual "capturePrefix" "" $ capturePrefix m
assertEqual "captureSuffix" " aa b" $ captureSuffix m
, testCase "complete (=~~/ByteString)" $ do
mtch <- B.pack str_ =~~ regex_ :: IO (Match B.ByteString)
assertEqual "Match" mtch $ B.pack <$> regex_str_match
, testCase "complete (all,String)" $ do
let mtchs = str_ =~ regex_ :: Matches String
assertEqual "Matches" mtchs regex_str_matches
, testCase "complete (all,reg_alt)" $ do
let mtchs = str_ =~ regex_alt :: Matches String
assertEqual "Matches" mtchs regex_alt_str_matches
, testCase "complete (=~~,all)" $ do
mtchs <- str_ =~~ regex_ :: IO (Matches String)
assertEqual "Matches" mtchs regex_str_matches
, testCase "fail (all)" $ do
let mtchs = str' =~ regex_ :: Matches String
assertEqual "not.anyMatches" False $ anyMatches mtchs
]
\end{code}
testing the replace functions at different types
\begin{code}
replace_methods_tests :: TestTree
replace_methods_tests = testGroup "Replace"
[ testCase "String/single" $ do
let m = str_ =~ regex_ :: Match String
r = replaceCaptures ALL fmt m
assertEqual "replaceCaptures" r "(0:0:(0:1:a) (0:2:bbbb)) aa b"
, testCase "String/alt" $ do
let ms = str_ =~ regex_ :: Matches String
r = replaceAllCaptures ALL fmt ms
chk r
, testCase "String" $ do
let ms = str_ =~ regex_ :: Matches String
r = replaceAllCaptures ALL fmt ms
chk r
, testCase "ByteString" $ do
let ms = B.pack str_ =~ regex_ :: Matches B.ByteString
r = replaceAllCaptures ALL fmt ms
chk r
, testCase "LBS.ByteString" $ do
let ms = LBS.pack str_ =~ regex_ :: Matches LBS.ByteString
r = replaceAllCaptures ALL fmt ms
chk r
, testCase "Seq Char" $ do
let ms = S.fromList str_ =~ regex_ :: Matches (S.Seq Char)
f = \_ (RELocation i j) Capture{..} -> Just $ S.fromList $
"(" M.<> show i M.<> ":" M.<> show_co j M.<> ":" M.<>
F.toList capturedText M.<> ")"
r = replaceAllCaptures ALL f ms
assertEqual "replaceAllCaptures" r $
S.fromList "(0:0:(0:1:a) (0:2:bbbb)) (1:0:(1:1:aa) (1:2:b))"
, testCase "Text" $ do
let ms = T.pack str_ =~ regex_ :: Matches T.Text
r = replaceAllCaptures ALL fmt ms
chk r
, testCase "LT.Text" $ do
let ms = LT.pack str_ =~ regex_ :: Matches LT.Text
r = replaceAllCaptures ALL fmt ms
chk r
]
where
chk r =
assertEqual
"replaceAllCaptures"
r
"(0:0:(0:1:a) (0:2:bbbb)) (1:0:(1:1:aa) (1:2:b))"
fmt :: (IsString s,Replace s) => a -> RELocation -> Capture s -> Maybe s
fmt _ (RELocation i j) Capture{..} = Just $ "(" M.<> packR (show i) M.<> ":" M.<>
packR (show_co j) M.<> ":" M.<> capturedText M.<> ")"
show_co (CaptureOrdinal j) = show j
\end{code}
SearchReplace
\begin{code}
search_replace_tests :: TestTree
search_replace_tests = testGroup "SearchReplace" $
[ testCase "?=~/ [ed_| ... |]" $ "baz bar foobar" @=? "foo bar foobar" T_ST.?=~/ [ed_|foo///baz|] ()
, testCase "*=~/ [ed_| ... |]" $ "baz bar bazbar" @=? "foo bar foobar" T_ST.*=~/ [ed_|foo///baz|] MultilineSensitive
, testCase "TDFA.ed/String" $ test_ id tdfa_eds
, testCase "PCRE.ed/String" $ test_ id pcre_eds
, testCase "TDFA.ed/B" $ test_ B.pack tdfa_eds
, testCase "PCRE.ed/B" $ test_ B.pack pcre_eds
, testCase "TDFA.ed/LBS" $ test_ LBS.pack tdfa_eds
, testCase "PCRE.ed/LBS" $ test_ LBS.pack pcre_eds
, testCase "TDFA.ed/S" $ test_ S.fromList tdfa_eds
, testCase "PCRE.ed/S" $ test_ S.fromList pcre_eds
, testCase "TDFA.ed/T" $ test_ T.pack tdfa_eds
, testCase "TDFA.ed/LT" $ test_ LT.pack tdfa_eds
, testCase "TDFA.ed/T(d)" $ test_ T.pack tdfa_eds'
, testCase "PCRE.ed/LBS(d)" $ test_ LBS.pack pcre_eds'
, testg "TDFA.op" (T_ST.?=~/) (T_ST.*=~/) tdfa_sr
, testg "PCRE.op" (P_ST.?=~/) (P_ST.*=~/) pcre_sr
, testg "TDFA.op/String" (T_ST.?=~/) (T_ST.*=~/) tdfa_sr_str
, testg "PCRE.op/String" (P_ST.?=~/) (P_ST.*=~/) pcre_sr_str
, testg "TDFA.op/B" (T_BS.?=~/) (T_BS.*=~/) tdfa_sr_b
, testg "PCRE.op/B" (P_BS.?=~/) (P_BS.*=~/) pcre_sr_b
, testg "TDFA.op/LBS" (TLBS.?=~/) (TLBS.*=~/) tdfa_sr_lbs
, testg "PCRE.op/LBS" (PLBS.?=~/) (PLBS.*=~/) pcre_sr_lbs
, testg "TDFA.op/T" (T_TX.?=~/) (T_TX.*=~/) tdfa_sr_t
, testg "PCRE.op/T" (P_TX.?=~/) (P_TX.*=~/) pcre_sr_t
, testg "TDFA.op/LT" (TLTX.?=~/) (TLTX.*=~/) tdfa_sr_lt
, testg "PCRE.op/LT" (PLTX.?=~/) (PLTX.*=~/) pcre_sr_lt
, testG "TDFA.op/S" (T_SQ.?=~/) (T_SQ.*=~/) tdfa_sr_s
, testG "PCRE.op/S" (P_SQ.?=~/) (P_SQ.*=~/) pcre_sr_s
, testu "TDFA.U/String" id (T_ST.*=~/) [T_ST.ed|scientist///boffin|] (T_ST.*=~) [T_ST.re|λ-|]
, testu "TDFA.U/B" B.fromString (T_BS.*=~/) [T_BS.ed|scientist///boffin|] (T_BS.*=~) [T_BS.re|burble|]
, testu "TDFA.U/LBS" LBS.fromString (TLBS.*=~/) [TLBS.ed|scientist///boffin|] (TLBS.*=~) [TLBS.re|burble|]
, testu "TDFA.U/T" T.pack (T_TX.*=~/) [T_TX.ed|scientist///boffin|] (T_TX.*=~) [T_TX.re|λ-|]
, testu "TDFA.U/LT" LT.pack (TLTX.*=~/) [TLTX.ed|scientist///boffin|] (TLTX.*=~) [TLTX.re|λ-|]
, testu "TDFA.U/S" S.fromList (T_SQ.*=~/) [T_SQ.ed|scientist///boffin|] (T_SQ.*=~) [T_SQ.re|λ-|]
] ++ not_win32_for_now
[ testu "PCRE.U/String" id (P_ST.*=~/) [P_ST.ed|scientist///boffin|] (P_ST.*=~) [P_ST.re|λ-|]
, testu "PCRE.U/B" B.fromString (P_BS.*=~/) [P_BS.ed|scientist///boffin|] (P_BS.*=~) [P_BS.re|λ-|]
, testu "PCRE.U/LBS" LBS.fromString (PLBS.*=~/) [PLBS.ed|scientist///boffin|] (PLBS.*=~) [PLBS.re|λ-|]
, testu "PCRE.U/T" T.pack (P_TX.*=~/) [P_TX.ed|scientist///boffin|] (P_TX.*=~) [P_TX.re|λ-|]
, testu "PCRE.U/LT" LT.pack (PLTX.*=~/) [PLTX.ed|scientist///boffin|] (PLTX.*=~) [PLTX.re|λ-|]
, testu "PCRE.U/S" S.fromList (P_SQ.*=~/) [P_SQ.ed|scientist///boffin|] (P_SQ.*=~) [P_SQ.re|burble|]
]
where
not_win32_for_now :: [a] -> [a]
not_win32_for_now = case SI.os == "mingw32" of
True -> const []
False -> id
test_ :: IsRegex re a => (String->a) -> Edits Identity re a -> Assertion
test_ inj eds = inj rsm @=? runIdentity (sed' eds $ inj inp)
testg lab op1 opm sr = testGroup lab
[ testCase "?=~/" $ rs1 @=? inp `op1` sr
, testCase "*=~/" $ rsm @=? inp `opm` sr
]
testG lab op1 opm sr = testGroup lab
[ testCase "?=~/" $ S.fromList rs1 @=? S.fromList inp `op1` sr
, testCase "*=~/" $ S.fromList rsm @=? S.fromList inp `opm` sr
]
testu lab inj op sr qop rex = testGroup lab
[ testCase "*=~/" $ inj unr @=? inj uni `op` sr
, testCase "*=~" $ 1 @=? countMatches (inj uni `qop` rex)
]
inp, rs1, rsm :: IsString a => a
inp = "16/03/2017 01/01/2000\n"
rs1 = "2017-03-16 01/01/2000\n"
rsm = "2017-03-16 2000-01-01\n"
uni, unr :: String
uni = "\x2070E-\8364-\955-scientist-burble"
unr = "\x2070E-\8364-\955-boffin-burble"
tdfa_eds :: IsRegex TDFA.RE a => Edits Identity TDFA.RE a
tdfa_eds = Select [Template tdfa_sr]
pcre_eds :: IsRegex PCRE.RE a => Edits Identity PCRE.RE a
pcre_eds = Select [Template pcre_sr]
tdfa_sr :: IsRegex TDFA.RE a => SearchReplace TDFA.RE a
tdfa_sr = [TDFA.ed|${d}([0-9]{2})/${m}([0-9]{2})/${y}([0-9]{4})///${y}-${m}-${d}|]
pcre_sr :: IsRegex PCRE.RE a => SearchReplace PCRE.RE a
pcre_sr = [PCRE.ed|${d}([0-9]{2})/${m}([0-9]{2})/${y}([0-9]{4})///${y}-${m}-${d}|]
tdfa_sr_str :: SearchReplace TDFA.RE String
tdfa_sr_str = [T_ST.ed|${d}([0-9]{2})/${m}([0-9]{2})/${y}([0-9]{4})///${y}-${m}-${d}|]
pcre_sr_str :: SearchReplace PCRE.RE String
pcre_sr_str = [P_ST.ed|${d}([0-9]{2})/${m}([0-9]{2})/${y}([0-9]{4})///${y}-${m}-${d}|]
tdfa_sr_b :: SearchReplace TDFA.RE B.ByteString
tdfa_sr_b = [T_BS.ed|${d}([0-9]{2})/${m}([0-9]{2})/${y}([0-9]{4})///${y}-${m}-${d}|]
pcre_sr_b :: SearchReplace PCRE.RE B.ByteString
pcre_sr_b = [P_BS.ed|${d}([0-9]{2})/${m}([0-9]{2})/${y}([0-9]{4})///${y}-${m}-${d}|]
tdfa_sr_lbs :: SearchReplace TDFA.RE LBS.ByteString
tdfa_sr_lbs = [TLBS.ed|${d}([0-9]{2})/${m}([0-9]{2})/${y}([0-9]{4})///${y}-${m}-${d}|]
pcre_sr_lbs :: SearchReplace PCRE.RE LBS.ByteString
pcre_sr_lbs = [PLBS.ed|${d}([0-9]{2})/${m}([0-9]{2})/${y}([0-9]{4})///${y}-${m}-${d}|]
tdfa_sr_t :: SearchReplace TDFA.RE T.Text
tdfa_sr_t = [T_TX.ed|${d}([0-9]{2})/${m}([0-9]{2})/${y}([0-9]{4})///${y}-${m}-${d}|]
pcre_sr_t :: SearchReplace PCRE.RE T.Text
pcre_sr_t = [P_TX.ed|${d}([0-9]{2})/${m}([0-9]{2})/${y}([0-9]{4})///${y}-${m}-${d}|]
tdfa_sr_lt :: SearchReplace TDFA.RE LT.Text
tdfa_sr_lt = [TLTX.ed|${d}([0-9]{2})/${m}([0-9]{2})/${y}([0-9]{4})///${y}-${m}-${d}|]
pcre_sr_lt :: SearchReplace PCRE.RE LT.Text
pcre_sr_lt = [PLTX.ed|${d}([0-9]{2})/${m}([0-9]{2})/${y}([0-9]{4})///${y}-${m}-${d}|]
tdfa_sr_s :: SearchReplace TDFA.RE (S.Seq Char)
tdfa_sr_s = [T_SQ.ed|${d}([0-9]{2})/${m}([0-9]{2})/${y}([0-9]{4})///${y}-${m}-${d}|]
pcre_sr_s :: SearchReplace PCRE.RE (S.Seq Char)
pcre_sr_s = [P_SQ.ed|${d}([0-9]{2})/${m}([0-9]{2})/${y}([0-9]{4})///${y}-${m}-${d}|]
tdfa_eds' :: IsRegex TDFA.RE a => Edits Identity TDFA.RE a
tdfa_eds' = Select [Template $ tdfa_csr "${d}([0-9]{2})/${m}([0-9]{2})/${y}([0-9]{4})" "${y}-${m}-${d}"]
pcre_eds' :: IsRegex PCRE.RE a => Edits Identity PCRE.RE a
pcre_eds' = Select [Template $ pcre_csr "${d}([0-9]{2})/${m}([0-9]{2})/${y}([0-9]{4})" "${y}-${m}-${d}"]
tdfa_csr :: IsRegex TDFA.RE s
=> String
-> String
-> SearchReplace TDFA.RE s
tdfa_csr re_s = maybe (error "eek") id . TDFA.compileSearchReplace re_s
pcre_csr :: IsRegex PCRE.RE s
=> String
-> String
-> SearchReplace PCRE.RE s
pcre_csr re_s = maybe (error "eek") id . PCRE.compileSearchReplace re_s
\end{code}
Testing The REOptions
\begin{code}
options_tests :: TestTree
options_tests = testGroup "Simple REOptions"
[ testGroup "TDFA Simple REOptions"
[ testCase "default (MultilineSensitive)" $ assertEqual "#" 2 $
countMatches $ s TDFA.*=~ [TDFA.re|[0-9a-f]{2}$|]
, testCase "MultilineSensitive" $ assertEqual "#" 2 $
countMatches $ s TDFA.*=~ [TDFA.reMultilineSensitive|[0-9a-f]{2}$|]
, testCase "MultilineInsensitive" $ assertEqual "#" 4 $
countMatches $ s TDFA.*=~ [TDFA.reMultilineInsensitive|[0-9a-f]{2}$|]
, testCase "BlockSensitive" $ assertEqual "#" 0 $
countMatches $ s TDFA.*=~ [TDFA.reBlockSensitive|[0-9a-f]{2}$|]
, testCase "BlockInsensitive" $ assertEqual "#" 1 $
countMatches $ s TDFA.*=~ [TDFA.reBlockInsensitive|[0-9a-f]{2}$|]
]
, testGroup "PCRE Simple REOptions"
[ testCase "default (MultilineSensitive)" $ assertEqual "#" 2 $
countMatches $ s PCRE.*=~ [PCRE.re|[0-9a-f]{2}$|]
, testCase "MultilineSensitive" $ assertEqual "#" 2 $
countMatches $ s PCRE.*=~ [PCRE.reMultilineSensitive|[0-9a-f]{2}$|]
, testCase "MultilineInsensitive" $ assertEqual "#" 4 $
countMatches $ s PCRE.*=~ [PCRE.reMultilineInsensitive|[0-9a-f]{2}$|]
, testCase "BlockSensitive" $ assertEqual "#" 0 $
countMatches $ s PCRE.*=~ [PCRE.reBlockSensitive|[0-9a-f]{2}$|]
, testCase "BlockInsensitive" $ assertEqual "#" 1 $
countMatches $ s PCRE.*=~ [PCRE.reBlockInsensitive|[0-9a-f]{2}$|]
]
]
where
s = "0a\nbb\nFe\nA5" :: String
\end{code}
Exercising Our Many APIs
\begin{code}
many_tests :: TestTree
many_tests = testGroup "Many Tests"
[ testCase "PCRE a" $ test_ (PCRE.*=~) (PCRE.?=~) (PCRE.=~) (PCRE.=~~) matchOnce matchMany makeSearchReplace id re_pcre
, testCase "PCRE ByteString" $ test_ (P_BS.*=~) (P_BS.?=~) (P_BS.=~) (P_BS.=~~) matchOnce matchMany makeSearchReplace B.pack re_pcre
, testCase "PCRE ByteString.Lazy" $ test_ (PLBS.*=~) (PLBS.?=~) (PLBS.=~) (PLBS.=~~) matchOnce matchMany makeSearchReplace LBS.pack re_pcre
, testCase "PCRE Sequence" $ test_ (P_SQ.*=~) (P_SQ.?=~) (P_SQ.=~) (P_SQ.=~~) matchOnce matchMany makeSearchReplace S.fromList re_pcre
, testCase "PCRE String" $ test_ (P_ST.*=~) (P_ST.?=~) (P_ST.=~) (P_ST.=~~) matchOnce matchMany makeSearchReplace id re_pcre
, testCase "PCRE Text" $ test_ (P_TX.*=~) (P_TX.?=~) (P_TX.=~) (P_TX.=~~) matchOnce matchMany makeSearchReplace T.pack re_pcre
, testCase "PCRE Text.Lazy" $ test_ (PLTX.*=~) (PLTX.?=~) (PLTX.=~) (PLTX.=~~) matchOnce matchMany makeSearchReplace LT.pack re_pcre
, testCase "TDFA a" $ test_ (TDFA.*=~) (TDFA.?=~) (TDFA.=~) (TDFA.=~~) matchOnce matchMany makeSearchReplace id re_tdfa
, testCase "TDFA ByteString" $ test_ (T_BS.*=~) (T_BS.?=~) (T_BS.=~) (T_BS.=~~) matchOnce matchMany makeSearchReplace B.pack re_tdfa
, testCase "TDFA ByteString.Lazy" $ test_ (TLBS.*=~) (TLBS.?=~) (TLBS.=~) (TLBS.=~~) matchOnce matchMany makeSearchReplace LBS.pack re_tdfa
, testCase "TDFA Sequence" $ test_ (T_SQ.*=~) (T_SQ.?=~) (T_SQ.=~) (T_SQ.=~~) matchOnce matchMany makeSearchReplace S.fromList re_tdfa
, testCase "TDFA String" $ test_ (T_ST.*=~) (T_ST.?=~) (T_ST.=~) (T_ST.=~~) matchOnce matchMany makeSearchReplace id re_tdfa
, testCase "TDFA Text" $ test_ (T_TX.*=~) (T_TX.?=~) (T_TX.=~) (T_TX.=~~) matchOnce matchMany makeSearchReplace T.pack re_tdfa
, testCase "TDFA Text.Lazy" $ test_ (TLTX.*=~) (TLTX.?=~) (TLTX.=~) (TLTX.=~~) matchOnce matchMany makeSearchReplace LT.pack re_tdfa
]
where
test_ :: (IsRegex r s,Show s,Eq s)
=> (s->r->Matches s)
-> (s->r->Match s)
-> (s->r->Matches s)
-> (s->r->Maybe(Match s))
-> (r->s->Match s)
-> (r->s->Matches s)
-> (s->s->Maybe (SearchReplace r s))
-> (String->s)
-> r
-> Assertion
test_ (%*=~) (%?=~) (%=~) (%=~~) mo mm mk_sr0 inj r = do
2 @=? countMatches mtchs
Just txt' @=? matchedText mtch
mtchs @=? mtchs'
mb_mtch @=? Just mtch
mtch @=? mtch''
mtchs @=? mtchs''
txt'' @=? searchReplaceAll (mk_sr re_t tpl) txt
where
mtchs = txt %*=~ r
mtch = txt %?=~ r
mtchs' = txt %=~ r
mb_mtch = txt %=~~ r
mtch'' = mo r txt
mtchs'' = mm r txt
re_t = inj re_s
tpl = inj "${d}/${m}/${y}"
txt = inj "2016-01-09 2015-12-5 2015-10-05"
txt' = inj "2016-01-09"
txt'' = inj "09/01/2016 2015-12-5 05/10/2015"
mk_sr = \r_ t_ -> maybe (error "agh") id $ mk_sr0 r_ t_
re_pcre = fromMaybe oops $ PCRE.compileRegex "[0-9]{4}-[0-9]{2}-[0-9]{2}"
re_tdfa = fromMaybe oops $ TDFA.compileRegex "[0-9]{4}-[0-9]{2}-[0-9]{2}"
re_s = "${y}([0-9]{4})-${m}([0-9]{2})-${d}([0-9]{2})"
oops = error "many_tests"
\end{code}
Testing the RE Escape Functions
-------------------------------
\begin{code}
escape_tests :: TestTree
escape_tests = testGroup "Escape Tests"
[ testGroup "PCRE"
[ testCase "Escaping empty string" $
assertBool "empty string" $
tst P_ST.escape (P_ST.?=~) ""
, testCase "Escaping RE metacharacters" $
assertBool "metacharacters" $
tst P_ST.escape (P_ST.?=~) metacharacters
, localOption (SmallCheckDepth 6) $
SC.testProperty "matched $ ?=~ [re|^escape()$|]" $
tst P_ST.escape (P_ST.?=~)
]
, testGroup "TDFA"
[ testCase "Escaping empty string" $
assertBool "empty string" $
tst T_ST.escape (T_ST.?=~) ""
, testCase "Escaping RE metacharacters" $
assertBool "metacharacters" $
tst T_ST.escape (T_ST.?=~) metacharacters
, localOption (SmallCheckDepth 6) $
SC.testProperty "matched $ ?=~ [re|^escape()$|]" $
tst T_ST.escape (T_ST.?=~)
]
]
where
tst :: ((String->String)->String->Maybe a)
-> (String->a->Match String)
-> String
-> Bool
tst esc0 (%=~) s = matched $ s %=~ esc s
where
esc = un_maybe . esc0 (("^" ++) . (++ "$"))
metacharacters :: String
metacharacters = "^\\.|*+?()[]{}$"
\end{code}
Named Capture Tests
-------------------
\begin{code}
named_capture_tests :: TestTree
named_capture_tests = localOption (SmallCheckDepth 4) $
testGroup "NamedCaptures"
[ format_scan_tests
, analyse_tokens_tests
]
instance Monad m => Serial m Token
format_scan_tests :: TestTree
format_scan_tests =
testGroup "FormatToken/Scan Properties"
[ localOption (SmallCheckDepth 4) $
SC.testProperty "formatTokens == formatTokens0" $
\tks -> formatTokens tks == formatTokens0 tks
, localOption (SmallCheckDepth 4) $
SC.testProperty "scan . formatTokens' idFormatTokenREOptions == id" $
\tks -> all validToken tks ==>
scan (formatTokens' idFormatTokenREOptions tks) == tks
]
analyse_tokens_tests :: TestTree
analyse_tokens_tests =
testGroup "Analysing [Token] Unit Tests"
[ tc [here|foobar|] []
, tc [here||] []
, tc [here|$([0-9]{4})|] []
, tc [here|${x}()|] [(1,"x")]
, tc [here|${}()|] []
, tc [here|${}()${foo}()|] [(2,"foo")]
, tc [here|${x}(${y()})|] [(1,"x")]
, tc [here|${x}(${y}())|] [(1,"x"),(2,"y")]
, tc [here|${a}(${b{}())|] [(1,"a")]
, tc [here|${y}([0-9]{4})-${m}([0-9]{2})-${d}([0-9]{2})|] [(1,"y"),(2,"m"),(3,"d")]
, tc [here|@$(@|\{${name}([^{}]+)\})|] [(2,"name")]
, tc [here|${y}[0-9]{4}|] []
, tc [here|${}([0-9]{4})|] []
]
where
tc s al =
testCase s $ assertEqual "CaptureNames"
(xnc s)
(HM.fromList
[ (CaptureName $ T.pack n,CaptureOrdinal i)
| (i,n)<-al
]
)
xnc = either oops (snd . fst) . extractNamedCaptures
where
oops = error "analyse_tokens_tests: unexpected parse failure"
\end{code}
AddCaptureNames Tests
---------------------
\begin{code}
add_capture_names_tests :: TestTree
add_capture_names_tests = testGroup "AddCaptureNames Tests"
[ test_add_capture_name "Match String" test_match regex_str_match
, test_add_capture_name "Matches String" test_matches regex_str_matches
, test_add_capture_name "Match B.ByteString" test_match $ B.pack <$> regex_str_match
, test_add_capture_name "Matches B.ByteString" test_matches $ B.pack <$> regex_str_matches
, test_add_capture_name "Match LBS.ByteString" test_match $ LBS.pack <$> regex_str_match
, test_add_capture_name "Matches LBS.ByteString" test_matches $ LBS.pack <$> regex_str_matches
, test_add_capture_name "Match T.Text" test_match $ T.pack <$> regex_str_match
, test_add_capture_name "Matches T.Text" test_matches $ T.pack <$> regex_str_matches
, test_add_capture_name "Match LT.Text" test_match $ LT.pack <$> regex_str_match
, test_add_capture_name "Matches LT.Text" test_matches $ LT.pack <$> regex_str_matches
, test_add_capture_name "Match (Seq Char)" test_match $ S.fromList <$> regex_str_match
, test_add_capture_name "Matches (Seq Char)" test_matches $ S.fromList <$> regex_str_matches
]
test_matches :: CaptureNames -> Matches a -> Bool
test_matches cnms = all (test_match cnms) . allMatches
test_match :: CaptureNames -> Match a -> Bool
test_match cnms mtch = captureNames mtch == cnms
test_add_capture_name :: Typeable a
=> String
-> (CaptureNames->a->Bool)
-> a
-> TestTree
test_add_capture_name lab tst x = testCase lab $
assertBool lab $ tst cnms $ addCaptureNames cnms x
where
cnms = HM.fromList
[ (CaptureName "x",1)
, (CaptureName "y",2)
]
\end{code}
The Find Tests
--------------
\begin{code}
find_tests :: TestTree
find_tests = testGroup "Find Tests"
[ testCase "examples/" $ do
fps <- findMatches_ findMethods [re|^re-.*\.lhs|] "examples/"
example_paths @=? filter (not . matched . (?=~ [re|master\.lhs|])) fps
]
example_paths :: [String]
example_paths =
[ "examples/re-gen-cabals.lhs"
, "examples/re-gen-modules.lhs"
, "examples/re-include.lhs"
, "examples/re-nginx-log-processor.lhs"
, "examples/re-prep.lhs"
, "examples/re-sort-imports.lhs"
, "examples/re-tests.lhs"
, "examples/re-top.lhs"
, "examples/re-tutorial-options.lhs"
, "examples/re-tutorial-replacing.lhs"
, "examples/re-tutorial-testbench.lhs"
, "examples/re-tutorial-tools.lhs"
, "examples/re-tutorial.lhs"
]
findMethods :: FindMethods String
findMethods =
FindMethods
{ doesDirectoryExistDM = doesDirectoryExist
, listDirectoryDM = getDirectoryContents
, combineDM = (>)
}
\end{code}
The Backslash Tests
-------------------
\begin{code}
backslash_tests :: TestTree
backslash_tests = testGroup "Backslash Tests"
[ testGroup "PCRE"
[ testCase "backslash-a" $ "\ay" @=? "--foo\ay" P_TX.*=~/ [P_TX.edBlockSensitive|--[^\a]*///|]
, testCase "backslash-b" $ "\by" @=? "--foo\by" P_TX.*=~/ [P_TX.edBlockSensitive|--[^\b]*///|]
, testCase "backslash-f" $ "\fy" @=? "--foo\fy" P_TX.*=~/ [P_TX.edBlockSensitive|--[^\f]*///|]
, testCase "backslash-n" $ "\ny" @=? "--foo\ny" P_TX.*=~/ [P_TX.edBlockSensitive|--[^\n]*///|]
, testCase "backslash-r" $ "\ry" @=? "--foo\ry" P_TX.*=~/ [P_TX.edBlockSensitive|--[^\r]*///|]
, testCase "backslash-t" $ "\ty" @=? "--foo\ty" P_TX.*=~/ [P_TX.edBlockSensitive|--[^\t]*///|]
, testCase "backslash-v" $ "\vy" @=? "--foo\vy" P_TX.*=~/ [P_TX.edBlockSensitive|--[^\v]*///|]
, testCase "backslash-$" $ "$y" @=? "--foo$y" P_TX.*=~/ [P_TX.edBlockSensitive|--[^\$]*///|]
, testCase "backslash-backslash" $ "\\y" @=? "--foo\\y" P_TX.*=~/ [P_TX.edBlockSensitive|--[^\\]*///|]
]
, testGroup "TDFA"
[ testCase "backslash-a" $ "\ay" @=? "--foo\ay" T_TX.*=~/ [T_TX.edBlockSensitive|--[^\a]*///|]
, testCase "backslash-b" $ "\by" @=? "--foo\by" T_TX.*=~/ [T_TX.edBlockSensitive|--[^\b]*///|]
, testCase "backslash-f" $ "\fy" @=? "--foo\fy" T_TX.*=~/ [T_TX.edBlockSensitive|--[^\f]*///|]
, testCase "backslash-n" $ "\ny" @=? "--foo\ny" T_TX.*=~/ [T_TX.edBlockSensitive|--[^\n]*///|]
, testCase "backslash-r" $ "\ry" @=? "--foo\ry" T_TX.*=~/ [T_TX.edBlockSensitive|--[^\r]*///|]
, testCase "backslash-t" $ "\ty" @=? "--foo\ty" T_TX.*=~/ [T_TX.edBlockSensitive|--[^\t]*///|]
, testCase "backslash-v" $ "\vy" @=? "--foo\vy" T_TX.*=~/ [T_TX.edBlockSensitive|--[^\v]*///|]
, testCase "backslash-$" $ "$y" @=? "--foo$y" T_TX.*=~/ [T_TX.edBlockSensitive|--[^\$]*///|]
, testCase "backslash-backslash" $ "\\y" @=? "--foo\\y" T_TX.*=~/ [T_TX.edBlockSensitive|--[^\\]*///|]
]
]
\end{code}
The Miscelaneous Tests
----------------------
\begin{code}
misc_tests :: TestTree
misc_tests = testGroup "Miscelaneous Tests"
[ testGroup "CaptureID"
[ testCase "CaptureID lookup failure" $ do
ok <- isValidError $ unsafe_find_capture_id [cp|foo|] $ reCaptureNames [re|foo|]
assertBool "failed" ok
]
, testGroup "QQ"
[ qq_tc "re" re
, qq_tc "reMS" reMS
, qq_tc "reMI" reMI
, qq_tc "reBS" reBS
, qq_tc "reBI" reBI
, qq_tc "reMultilineSensitive" reMultilineSensitive
, qq_tc "reMultilineInsensitive" reMultilineInsensitive
, qq_tc "reBlockSensitive" reBlockSensitive
, qq_tc "reBlockInsensitive" reBlockInsensitive
, qq_tc "re_" re_
, qq_tc "ed" ed
, qq_tc "edMS" edMS
, qq_tc "edMI" edMI
, qq_tc "edBS" edBS
, qq_tc "edBI" edBI
, qq_tc "edMultilineSensitive" edMultilineSensitive
, qq_tc "edMultilineInsensitive" edMultilineInsensitive
, qq_tc "edBlockSensitive" edBlockSensitive
, qq_tc "edBlockInsensitive" edBlockInsensitive
, qq_tc "ed_" ed_
]
, testGroup "PreludeMacros"
[ valid_string "preludeMacroTable" preludeMacroTable
, valid_macro "preludeMacroSummary" preludeMacroSummary
, valid_string "preludeMacroSources" preludeMacroSources
, valid_macro "preludeMacroSource" preludeMacroSource
]
-- because HPC can't measure our testing of [re|..|] forms,
-- we are eliminating them from our enquiries
, testGroup "RE"
[ valid_res TDFA.regexType
[ TDFA.re
, TDFA.reMS
, TDFA.reMI
, TDFA.reBS
, TDFA.reBI
, TDFA.reMultilineSensitive
, TDFA.reMultilineInsensitive
, TDFA.reBlockSensitive
, TDFA.reBlockInsensitive
, TDFA.re_
, TDFA.ed
, TDFA.edMS
, TDFA.edMI
, TDFA.edBS
, TDFA.edBI
, TDFA.edMultilineSensitive
, TDFA.edMultilineInsensitive
, TDFA.edBlockSensitive
, TDFA.edBlockInsensitive
, TDFA.ed_
]
, testCase "TDFA.regexType" $ assertBool "TDFA" $ isTDFA TDFA.regexType
, testCase "TDFA.reOptions" $ assert_empty_macs $ optionsMacs (TDFA.reOptions tdfa_re)
, testCase "TDFA.makeREOptions md" $ assert_empty_macs $ optionsMacs tdfa_opts
, testCase "TDFA.preludeTestsFailing" $ [] @=? TDFA.preludeTestsFailing
, ne_string "TDFA.preludeTable" TDFA.preludeTable
, ne_string "TDFA.preludeSources" TDFA.preludeSources
, testGroup "TDFA.preludeSummary"
[ ne_string (presentPreludeMacro pm) $ TDFA.preludeSummary pm
| pm <- tdfa_prelude_macros
]
, testGroup "TDFA.preludeSource"
[ ne_string (presentPreludeMacro pm) $ TDFA.preludeSource pm
| pm <- tdfa_prelude_macros
]
-- because HPC can't measure our testing of [re|..|] forms,
-- we are eliminating them from our enquiries
, valid_res PCRE.regexType
[ PCRE.re
, PCRE.reMS
, PCRE.reMI
, PCRE.reBS
, PCRE.reBI
, PCRE.reMultilineSensitive
, PCRE.reMultilineInsensitive
, PCRE.reBlockSensitive
, PCRE.reBlockInsensitive
, PCRE.re_
, PCRE.ed
, PCRE.edMS
, PCRE.edMI
, PCRE.edBS
, PCRE.edBI
, PCRE.edMultilineSensitive
, PCRE.edMultilineInsensitive
, PCRE.edBlockSensitive
, PCRE.edBlockInsensitive
, PCRE.ed_
]
, testCase "PCRE.regexType" $ assertBool "PCRE" $ isPCRE PCRE.regexType
, testCase "PCRE.reOptions" $ assert_empty_macs $ optionsMacs (PCRE.reOptions pcre_re)
, testCase "PCRE.makeREOptions md" $ assert_empty_macs $ optionsMacs pcre_opts
, testCase "PCRE.preludeTestsFailing" $ [] @=? PCRE.preludeTestsFailing
, ne_string "PCRE.preludeTable" PCRE.preludeTable
, ne_string "PCRE.preludeTable" PCRE.preludeSources
, testGroup "PCRE.preludeSummary"
[ ne_string (presentPreludeMacro pm) $ PCRE.preludeSummary pm
| pm <- pcre_prelude_macros
]
, testGroup "PCRE.preludeSource"
[ ne_string (presentPreludeMacro pm) $ PCRE.preludeSource pm
| pm <- pcre_prelude_macros
]
]
]
where
tdfa_re = fromMaybe oops $ TDFA.compileRegexWithOptions tdfa_opts ".*"
pcre_re = fromMaybe oops $ PCRE.compileRegexWithOptions pcre_opts ".*"
tdfa_opts = TDFA.makeREOptions no_macs_t :: REOptions_ TDFA.RE TDFA_.CompOption TDFA_.ExecOption
pcre_opts = PCRE.makeREOptions no_macs_p :: REOptions_ PCRE.RE PCRE_.CompOption PCRE_.ExecOption
no_macs_t = HM.fromList [] :: Macros TDFA.RE
no_macs_p = HM.fromList [] :: Macros PCRE.RE
oops = error "misc_tests"
assert_empty_macs = assertBool "macros not empty" . HM.null
qq_tc :: String -> QuasiQuoter -> TestTree
qq_tc lab qq = testCase lab $ quoteExp qq `seq` assertBool "qq_tc" True
valid_macro :: String -> (RegexType->PreludeMacro->String) -> TestTree
valid_macro label f = testGroup label
[ valid_string (presentPreludeMacro pm) (flip f pm)
| pm<-[minBound..maxBound]
]
valid_string :: String -> (RegexType->String) -> TestTree
valid_string label f = testGroup label
[ ne_string (presentRegexType rty) $ f rty
| rty<-[TDFA.regexType] -- until PCRE has a binding for all macros
]
ne_string :: String -> String -> TestTree
ne_string label s =
testCase label $ assertBool "non-empty string" $ length s > 0
-- just evaluating quasi quoters to HNF for now -- they
-- being tested everywhere [re|...|] (etc.) calculations
-- are bings used but HPC isn't measuring this
valid_res :: RegexType -> [QuasiQuoter] -> TestTree
valid_res rty = testCase (show rty) . foldr seq (return ())
pcre_prelude_macros :: [PreludeMacro]
pcre_prelude_macros = filter (/= PM_string) [minBound..maxBound]
tdfa_prelude_macros :: [PreludeMacro]
tdfa_prelude_macros = [minBound..maxBound]
s_toList :: S.Seq Char -> [Char]
s_toList = F.toList
newtype Identity a = Identity { runIdentity :: a }
deriving (Functor)
instance Applicative Identity where
pure = Identity
(<*>) (Identity f) (Identity x) = Identity $ f x
instance Monad Identity where
return = Identity
(>>=) (Identity x) f = f x
isValidError :: a -> IO Bool
isValidError x = catch (x `seq` return False) hdl
where
hdl :: SomeException -> IO Bool
hdl se = return $ (length $ show se) `seq` True
unsafe_find_capture_id :: CaptureID -> CaptureNames -> CaptureOrdinal
unsafe_find_capture_id cid = either error id . findCaptureID cid
un_maybe :: Maybe a -> a
un_maybe = maybe (error "urk") id
\end{code}