\begin{code}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE CPP #-}
#if __GLASGOW_HASKELL__ >= 800
{-# LANGUAGE TemplateHaskellQuotes #-}
#else
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
#endif
module Text.RE.ZeInternals.NamedCaptures
( cp
, extractNamedCaptures
, idFormatTokenREOptions
, Token(..)
, validToken
, formatTokens
, formatTokens'
, formatTokens0
, scan
) where
import Data.Char
import qualified Data.HashMap.Strict as HM
import qualified Data.Text as T
import GHC.Generics
import qualified Language.Haskell.TH as TH
import Language.Haskell.TH.Quote
import Text.RE.ZeInternals.PreludeMacros
import Text.RE.ZeInternals.QQ
import Text.RE.ZeInternals.TestBench
import Text.RE.ZeInternals.Tools.Lex
import Text.RE.ZeInternals.Types.CaptureID
import Text.RE.ZeInternals.Types.Match
import Text.RE.ZeInternals.Types.Poss
import Text.Regex.TDFA
cp :: QuasiQuoter
cp :: QuasiQuoter
cp =
(String -> QuasiQuoter
qq0 String
"cp")
{ quoteExp :: String -> Q Exp
quoteExp = String -> Q Exp
parse_capture
}
extractNamedCaptures :: String -> Either String ((Int,CaptureNames),String)
String
s = ((Int, CaptureNames), String)
-> Either String ((Int, CaptureNames), String)
forall a b. b -> Either a b
Right ([Token] -> (Int, CaptureNames)
analyseTokens [Token]
tks,[Token] -> String
formatTokens [Token]
tks)
where
tks :: [Token]
tks = String -> [Token]
scan String
s
\end{code}
Token
\begin{code}
data Token
= ECap (Maybe String)
| PGrp
| PCap
| Bra
| BS Char
| Other Char
deriving (Int -> Token -> ShowS
[Token] -> ShowS
Token -> String
(Int -> Token -> ShowS)
-> (Token -> String) -> ([Token] -> ShowS) -> Show Token
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Token] -> ShowS
$cshowList :: [Token] -> ShowS
show :: Token -> String
$cshow :: Token -> String
showsPrec :: Int -> Token -> ShowS
$cshowsPrec :: Int -> Token -> ShowS
Show,(forall x. Token -> Rep Token x)
-> (forall x. Rep Token x -> Token) -> Generic Token
forall x. Rep Token x -> Token
forall x. Token -> Rep Token x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Token x -> Token
$cfrom :: forall x. Token -> Rep Token x
Generic,Token -> Token -> Bool
(Token -> Token -> Bool) -> (Token -> Token -> Bool) -> Eq Token
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Token -> Token -> Bool
$c/= :: Token -> Token -> Bool
== :: Token -> Token -> Bool
$c== :: Token -> Token -> Bool
Eq)
validToken :: Token -> Bool
validToken :: Token -> Bool
validToken Token
tkn = case Token
tkn of
ECap Maybe String
mb -> Bool -> (String -> Bool) -> Maybe String -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
True String -> Bool
forall (t :: * -> *). Foldable t => t Char -> Bool
check_ecap Maybe String
mb
Token
PGrp -> Bool
True
Token
PCap -> Bool
True
Token
Bra -> Bool
True
BS Char
c -> Char -> Bool
is_dot Char
c
Other Char
c -> Char -> Bool
is_dot Char
c
where
check_ecap :: t Char -> Bool
check_ecap t Char
s = Bool -> Bool
not (t Char -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null t Char
s) Bool -> Bool -> Bool
&& (Char -> Bool) -> t Char -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
not_br t Char
s
is_dot :: Char -> Bool
is_dot Char
c = Char
cChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'\n'
not_br :: Char -> Bool
not_br Char
c = Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ Char
c Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
"{}\n"
\end{code}
Analysing [Token] -> CaptureNames
\begin{code}
analyseTokens :: [Token] -> (Int,CaptureNames)
analyseTokens :: [Token] -> (Int, CaptureNames)
analyseTokens [Token]
tks0 = case Int -> [Token] -> (Int, [(CaptureName, CaptureOrdinal)])
count_em Int
1 [Token]
tks0 of
(Int
n,[(CaptureName, CaptureOrdinal)]
as) -> (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1, [(CaptureName, CaptureOrdinal)] -> CaptureNames
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HM.fromList [(CaptureName, CaptureOrdinal)]
as)
where
count_em :: Int -> [Token] -> (Int, [(CaptureName, CaptureOrdinal)])
count_em Int
n [] = (Int
n,[])
count_em Int
n (Token
tk:[Token]
tks) = case Int -> [Token] -> (Int, [(CaptureName, CaptureOrdinal)])
count_em (Int
n Int -> Int -> Int
`seq` Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
d) [Token]
tks of
(Int
n',[(CaptureName, CaptureOrdinal)]
as) -> (Int
n',[(CaptureName, CaptureOrdinal)]
bd[(CaptureName, CaptureOrdinal)]
-> [(CaptureName, CaptureOrdinal)]
-> [(CaptureName, CaptureOrdinal)]
forall a. [a] -> [a] -> [a]
++[(CaptureName, CaptureOrdinal)]
as)
where
(Int
d,[(CaptureName, CaptureOrdinal)]
bd) = case Token
tk of
ECap (Just nm) -> (,) Int
1 [(Text -> CaptureName
CaptureName (Text -> CaptureName) -> Text -> CaptureName
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack String
nm,Int -> CaptureOrdinal
CaptureOrdinal Int
n)]
ECap Nothing -> (,) Int
1 []
Token
PGrp -> (,) Int
0 []
Token
PCap -> (,) Int
1 []
Token
Bra -> (,) Int
1 []
BS _ -> (,) Int
0 []
Other _ -> (,) Int
0 []
\end{code}
Scanning Regex Strings
\begin{code}
scan :: String -> [Token]
scan :: String -> [Token]
scan = (Regex -> String -> Match String)
-> [(Regex, Match String -> Maybe Token)]
-> Token
-> String
-> [Token]
forall s re t.
Replace s =>
(re -> s -> Match s) -> [(re, Match s -> Maybe t)] -> t -> s -> [t]
alex' Regex -> String -> Match String
forall regex source target.
RegexContext regex source target =>
regex -> source -> target
match [(Regex, Match String -> Maybe Token)]
al (Token -> String -> [Token]) -> Token -> String -> [Token]
forall a b. (a -> b) -> a -> b
$ String -> Token
forall a. String -> a
oops String
"top"
where
al :: [(Regex,Match String->Maybe Token)]
al :: [(Regex, Match String -> Maybe Token)]
al =
[ String
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a compOpt execOpt source a a.
RegexMaker a compOpt execOpt source =>
source -> (a -> a) -> (a, a -> Maybe a)
mk String
"\\$\\{([^{}]+)\\}\\(" ((Match String -> Token) -> (Regex, Match String -> Maybe Token))
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a b. (a -> b) -> a -> b
$ Maybe String -> Token
ECap (Maybe String -> Token)
-> (Match String -> Maybe String) -> Match String -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Maybe String
forall a. a -> Maybe a
Just (String -> Maybe String)
-> (Match String -> String) -> Match String -> Maybe String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Match String -> String
forall a. Match a -> a
x_1
, String
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a compOpt execOpt source a a.
RegexMaker a compOpt execOpt source =>
source -> (a -> a) -> (a, a -> Maybe a)
mk String
"\\$\\(" ((Match String -> Token) -> (Regex, Match String -> Maybe Token))
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a b. (a -> b) -> a -> b
$ Token -> Match String -> Token
forall a b. a -> b -> a
const (Token -> Match String -> Token) -> Token -> Match String -> Token
forall a b. (a -> b) -> a -> b
$ Maybe String -> Token
ECap Maybe String
forall a. Maybe a
Nothing
, String
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a compOpt execOpt source a a.
RegexMaker a compOpt execOpt source =>
source -> (a -> a) -> (a, a -> Maybe a)
mk String
"\\(\\?:" ((Match String -> Token) -> (Regex, Match String -> Maybe Token))
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a b. (a -> b) -> a -> b
$ Token -> Match String -> Token
forall a b. a -> b -> a
const Token
PGrp
, String
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a compOpt execOpt source a a.
RegexMaker a compOpt execOpt source =>
source -> (a -> a) -> (a, a -> Maybe a)
mk String
"\\(\\?" ((Match String -> Token) -> (Regex, Match String -> Maybe Token))
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a b. (a -> b) -> a -> b
$ Token -> Match String -> Token
forall a b. a -> b -> a
const Token
PCap
, String
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a compOpt execOpt source a a.
RegexMaker a compOpt execOpt source =>
source -> (a -> a) -> (a, a -> Maybe a)
mk String
"\\(" ((Match String -> Token) -> (Regex, Match String -> Maybe Token))
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a b. (a -> b) -> a -> b
$ Token -> Match String -> Token
forall a b. a -> b -> a
const Token
Bra
, String
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a compOpt execOpt source a a.
RegexMaker a compOpt execOpt source =>
source -> (a -> a) -> (a, a -> Maybe a)
mk String
"\\\\(.)" ((Match String -> Token) -> (Regex, Match String -> Maybe Token))
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a b. (a -> b) -> a -> b
$ Char -> Token
BS (Char -> Token) -> (Match String -> Char) -> Match String -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Char
forall p. [p] -> p
s2c (String -> Char)
-> (Match String -> String) -> Match String -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Match String -> String
forall a. Match a -> a
x_1
, String
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a compOpt execOpt source a a.
RegexMaker a compOpt execOpt source =>
source -> (a -> a) -> (a, a -> Maybe a)
mk String
"(.|\n)" ((Match String -> Token) -> (Regex, Match String -> Maybe Token))
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a b. (a -> b) -> a -> b
$ Char -> Token
Other (Char -> Token) -> (Match String -> Char) -> Match String -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Char
forall p. [p] -> p
s2c (String -> Char)
-> (Match String -> String) -> Match String -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Match String -> String
forall a. Match a -> a
x_1
]
x_1 :: Match a -> a
x_1 = CaptureID -> Match a -> a
forall a. CaptureID -> Match a -> a
captureText (CaptureID -> Match a -> a) -> CaptureID -> Match a -> a
forall a b. (a -> b) -> a -> b
$ CaptureOrdinal -> CaptureID
IsCaptureOrdinal (CaptureOrdinal -> CaptureID) -> CaptureOrdinal -> CaptureID
forall a b. (a -> b) -> a -> b
$ Int -> CaptureOrdinal
CaptureOrdinal Int
1
s2c :: [p] -> p
s2c [p
c] = p
c
s2c [p]
_ = String -> p
forall a. String -> a
oops String
"s2c"
mk :: source -> (a -> a) -> (a, a -> Maybe a)
mk source
s a -> a
f = ((String -> a) -> (a -> a) -> Poss a -> a
forall b a. (String -> b) -> (a -> b) -> Poss a -> b
poss String -> a
forall a. HasCallStack => String -> a
error a -> a
forall a. a -> a
id (Poss a -> a) -> Poss a -> a
forall a b. (a -> b) -> a -> b
$ source -> Poss a
forall regex compOpt execOpt source (m :: * -> *).
(RegexMaker regex compOpt execOpt source, MonadFail m) =>
source -> m regex
makeRegexM source
s,a -> Maybe a
forall a. a -> Maybe a
Just (a -> Maybe a) -> (a -> a) -> a -> Maybe a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> a
f)
oops :: String -> a
oops String
m = String -> a
forall a. HasCallStack => String -> a
error (String -> a) -> String -> a
forall a b. (a -> b) -> a -> b
$ String
"NamedCaptures.scan: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
m
\end{code}
Parsing captures
\begin{code}
parse_capture :: String -> TH.Q TH.Exp
parse_capture :: String -> Q Exp
parse_capture String
s = case (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isDigit String
s of
Bool
True -> [|IsCaptureOrdinal $ CaptureOrdinal $ read s|]
Bool
False -> [|IsCaptureName $ CaptureName $ T.pack s|]
\end{code}
Formatting [Token]
\begin{code}
formatTokens :: [Token] -> String
formatTokens :: [Token] -> String
formatTokens = FormatTokenREOptions -> [Token] -> String
formatTokens' FormatTokenREOptions
defFormatTokenREOptions
data FormatTokenREOptions =
FormatTokenREOptions
{ FormatTokenREOptions -> Maybe RegexType
_fto_regex_type :: Maybe RegexType
, FormatTokenREOptions -> Bool
_fto_min_caps :: Bool
, FormatTokenREOptions -> Bool
_fto_incl_caps :: Bool
}
deriving (Int -> FormatTokenREOptions -> ShowS
[FormatTokenREOptions] -> ShowS
FormatTokenREOptions -> String
(Int -> FormatTokenREOptions -> ShowS)
-> (FormatTokenREOptions -> String)
-> ([FormatTokenREOptions] -> ShowS)
-> Show FormatTokenREOptions
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [FormatTokenREOptions] -> ShowS
$cshowList :: [FormatTokenREOptions] -> ShowS
show :: FormatTokenREOptions -> String
$cshow :: FormatTokenREOptions -> String
showsPrec :: Int -> FormatTokenREOptions -> ShowS
$cshowsPrec :: Int -> FormatTokenREOptions -> ShowS
Show)
defFormatTokenREOptions :: FormatTokenREOptions
defFormatTokenREOptions :: FormatTokenREOptions
defFormatTokenREOptions =
FormatTokenREOptions :: Maybe RegexType -> Bool -> Bool -> FormatTokenREOptions
FormatTokenREOptions
{ _fto_regex_type :: Maybe RegexType
_fto_regex_type = Maybe RegexType
forall a. Maybe a
Nothing
, _fto_min_caps :: Bool
_fto_min_caps = Bool
False
, _fto_incl_caps :: Bool
_fto_incl_caps = Bool
False
}
idFormatTokenREOptions :: FormatTokenREOptions
idFormatTokenREOptions :: FormatTokenREOptions
idFormatTokenREOptions =
FormatTokenREOptions :: Maybe RegexType -> Bool -> Bool -> FormatTokenREOptions
FormatTokenREOptions
{ _fto_regex_type :: Maybe RegexType
_fto_regex_type = Maybe RegexType
forall a. Maybe a
Nothing
, _fto_min_caps :: Bool
_fto_min_caps = Bool
False
, _fto_incl_caps :: Bool
_fto_incl_caps = Bool
True
}
formatTokens' :: FormatTokenREOptions -> [Token] -> String
formatTokens' :: FormatTokenREOptions -> [Token] -> String
formatTokens' FormatTokenREOptions{Bool
Maybe RegexType
_fto_incl_caps :: Bool
_fto_min_caps :: Bool
_fto_regex_type :: Maybe RegexType
_fto_incl_caps :: FormatTokenREOptions -> Bool
_fto_min_caps :: FormatTokenREOptions -> Bool
_fto_regex_type :: FormatTokenREOptions -> Maybe RegexType
..} = (Token -> ShowS) -> String -> [Token] -> String
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Token -> ShowS
f String
""
where
f :: Token -> ShowS
f Token
tk String
tl = String
t_s String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
tl
where
t_s :: String
t_s = case Token
tk of
ECap mb -> Maybe String -> String
ecap Maybe String
mb
Token
PGrp -> if Bool -> (RegexType -> Bool) -> Maybe RegexType -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False RegexType -> Bool
isTDFA Maybe RegexType
_fto_regex_type then String
"(" else String
"(?:"
Token
PCap -> String
"(?"
Token
Bra -> Bool -> String
bra Bool
_fto_min_caps
BS c -> String
"\\" String -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char
c]
Other c -> [Char
c]
ecap :: Maybe String -> String
ecap Maybe String
mb = case Bool
_fto_incl_caps of
Bool
True -> case Maybe String
mb of
Maybe String
Nothing -> String
"$("
Just String
nm -> String
"${"String -> ShowS
forall a. [a] -> [a] -> [a]
++String
nmString -> ShowS
forall a. [a] -> [a] -> [a]
++String
"}("
Bool
False -> Bool -> String
bra Bool
_fto_min_caps
bra :: Bool -> String
bra Bool
mc = case Bool
mc Bool -> Bool -> Bool
&& Bool -> (RegexType -> Bool) -> Maybe RegexType -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False RegexType -> Bool
isPCRE Maybe RegexType
_fto_regex_type of
Bool
True -> String
"(?:"
Bool
False -> String
"("
\end{code}
\begin{code}
formatTokens0 :: [Token] -> String
formatTokens0 :: [Token] -> String
formatTokens0 = (Token -> ShowS) -> String -> [Token] -> String
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Token -> ShowS
f String
""
where
f :: Token -> ShowS
f Token
tk String
tl = String
t_s String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
tl
where
t_s :: String
t_s = case Token
tk of
ECap _ -> String
"("
Token
PGrp -> String
"(?:"
Token
PCap -> String
"(?"
Token
Bra -> String
"("
BS c -> String
"\\" String -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char
c]
Other c -> [Char
c]
\end{code}