\begin{code}
{-# LANGUAGE DeriveGeneric              #-}
{-# LANGUAGE RecordWildCards            #-}
{-# LANGUAGE CPP                        #-}
#if __GLASGOW_HASKELL__ >= 800
{-# LANGUAGE TemplateHaskellQuotes      #-}
#else
{-# LANGUAGE QuasiQuotes                #-}
{-# LANGUAGE TemplateHaskell            #-}
#endif

module Text.RE.ZeInternals.NamedCaptures
  ( cp
  , extractNamedCaptures
  , idFormatTokenREOptions
  , Token(..)
  , validToken
  , formatTokens
  , formatTokens'
  , formatTokens0
  , scan
  ) where

import           Data.Char
import qualified Data.HashMap.Strict          as HM
import qualified Data.Text                    as T
import           GHC.Generics
import qualified Language.Haskell.TH          as TH
import           Language.Haskell.TH.Quote
import           Text.RE.ZeInternals.PreludeMacros
import           Text.RE.ZeInternals.QQ
import           Text.RE.ZeInternals.TestBench
import           Text.RE.ZeInternals.Tools.Lex
import           Text.RE.ZeInternals.Types.CaptureID
import           Text.RE.ZeInternals.Types.Match
import           Text.RE.ZeInternals.Types.Poss
import           Text.Regex.TDFA


-- | quasi quoter for CaptureID: @[cp|0|]@, @[cp|0|]@, etc.,
-- indexing captures by classic positional numbers, and @[cp|foo|]@,
-- etc., referencing a named capture @[re| ... ${foo}( ... ) ... |]@.
cp :: QuasiQuoter
cp :: QuasiQuoter
cp =
    (String -> QuasiQuoter
qq0 String
"cp")
      { quoteExp :: String -> Q Exp
quoteExp = String -> Q Exp
parse_capture
      }

-- | extract the CaptureNames from an RE or return an error diagnostic
-- if the RE is not well formed; also returns the total number of captures
-- in the RE
extractNamedCaptures :: String -> Either String ((Int,CaptureNames),String)
extractNamedCaptures :: String -> Either String ((Int, CaptureNames), String)
extractNamedCaptures String
s = ((Int, CaptureNames), String)
-> Either String ((Int, CaptureNames), String)
forall a b. b -> Either a b
Right ([Token] -> (Int, CaptureNames)
analyseTokens [Token]
tks,[Token] -> String
formatTokens [Token]
tks)
  where
    tks :: [Token]
tks = String -> [Token]
scan String
s
\end{code}


Token
-----

\begin{code}
-- | our RE scanner returns a list of these tokens
data Token
  = ECap (Maybe String)
  | PGrp
  | PCap
  | Bra
  | BS          Char
  | Other       Char
  deriving (Int -> Token -> ShowS
[Token] -> ShowS
Token -> String
(Int -> Token -> ShowS)
-> (Token -> String) -> ([Token] -> ShowS) -> Show Token
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Token] -> ShowS
$cshowList :: [Token] -> ShowS
show :: Token -> String
$cshow :: Token -> String
showsPrec :: Int -> Token -> ShowS
$cshowsPrec :: Int -> Token -> ShowS
Show,(forall x. Token -> Rep Token x)
-> (forall x. Rep Token x -> Token) -> Generic Token
forall x. Rep Token x -> Token
forall x. Token -> Rep Token x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Token x -> Token
$cfrom :: forall x. Token -> Rep Token x
Generic,Token -> Token -> Bool
(Token -> Token -> Bool) -> (Token -> Token -> Bool) -> Eq Token
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Token -> Token -> Bool
$c/= :: Token -> Token -> Bool
== :: Token -> Token -> Bool
$c== :: Token -> Token -> Bool
Eq)

-- | check that a token is well formed
validToken :: Token -> Bool
validToken :: Token -> Bool
validToken Token
tkn = case Token
tkn of
    ECap  Maybe String
mb -> Bool -> (String -> Bool) -> Maybe String -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
True String -> Bool
forall (t :: * -> *). Foldable t => t Char -> Bool
check_ecap Maybe String
mb
    Token
PGrp     -> Bool
True
    Token
PCap     -> Bool
True
    Token
Bra      -> Bool
True
    BS    Char
c  -> Char -> Bool
is_dot Char
c
    Other Char
c  -> Char -> Bool
is_dot Char
c
  where
    check_ecap :: t Char -> Bool
check_ecap t Char
s = Bool -> Bool
not (t Char -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null t Char
s) Bool -> Bool -> Bool
&& (Char -> Bool) -> t Char -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
not_br t Char
s
    is_dot :: Char -> Bool
is_dot     Char
c = Char
cChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'\n'
    not_br :: Char -> Bool
not_br     Char
c = Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ Char
c Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
"{}\n"

\end{code}


Analysing [Token] -> CaptureNames
---------------------------------

\begin{code}
-- | analyse a token stream, returning the number of captures and the
-- 'CaptureNames'
analyseTokens :: [Token] -> (Int,CaptureNames)
analyseTokens :: [Token] -> (Int, CaptureNames)
analyseTokens [Token]
tks0 = case Int -> [Token] -> (Int, [(CaptureName, CaptureOrdinal)])
count_em Int
1 [Token]
tks0 of
    (Int
n,[(CaptureName, CaptureOrdinal)]
as) -> (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1, [(CaptureName, CaptureOrdinal)] -> CaptureNames
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HM.fromList [(CaptureName, CaptureOrdinal)]
as)
  where
    count_em :: Int -> [Token] -> (Int, [(CaptureName, CaptureOrdinal)])
count_em Int
n []       = (Int
n,[])
    count_em Int
n (Token
tk:[Token]
tks) = case Int -> [Token] -> (Int, [(CaptureName, CaptureOrdinal)])
count_em (Int
n Int -> Int -> Int
`seq` Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
d) [Token]
tks of
        (Int
n',[(CaptureName, CaptureOrdinal)]
as) -> (Int
n',[(CaptureName, CaptureOrdinal)]
bd[(CaptureName, CaptureOrdinal)]
-> [(CaptureName, CaptureOrdinal)]
-> [(CaptureName, CaptureOrdinal)]
forall a. [a] -> [a] -> [a]
++[(CaptureName, CaptureOrdinal)]
as)
      where
        (Int
d,[(CaptureName, CaptureOrdinal)]
bd) = case Token
tk of
          ECap (Just nm) -> (,) Int
1 [(Text -> CaptureName
CaptureName (Text -> CaptureName) -> Text -> CaptureName
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack String
nm,Int -> CaptureOrdinal
CaptureOrdinal Int
n)]
          ECap  Nothing  -> (,) Int
1 []
          Token
PGrp           -> (,) Int
0 []
          Token
PCap           -> (,) Int
1 []
          Token
Bra            -> (,) Int
1 []
          BS    _        -> (,) Int
0 []
          Other _        -> (,) Int
0 []
\end{code}


Scanning Regex Strings
----------------------

\begin{code}
-- | scan a RE string into a list of RE Token
scan :: String -> [Token]
scan :: String -> [Token]
scan = (Regex -> String -> Match String)
-> [(Regex, Match String -> Maybe Token)]
-> Token
-> String
-> [Token]
forall s re t.
Replace s =>
(re -> s -> Match s) -> [(re, Match s -> Maybe t)] -> t -> s -> [t]
alex' Regex -> String -> Match String
forall regex source target.
RegexContext regex source target =>
regex -> source -> target
match [(Regex, Match String -> Maybe Token)]
al (Token -> String -> [Token]) -> Token -> String -> [Token]
forall a b. (a -> b) -> a -> b
$ String -> Token
forall a. String -> a
oops String
"top"
  where
    al :: [(Regex,Match String->Maybe Token)]
    al :: [(Regex, Match String -> Maybe Token)]
al =
      [ String
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a compOpt execOpt source a a.
RegexMaker a compOpt execOpt source =>
source -> (a -> a) -> (a, a -> Maybe a)
mk String
"\\$\\{([^{}]+)\\}\\(" ((Match String -> Token) -> (Regex, Match String -> Maybe Token))
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a b. (a -> b) -> a -> b
$         Maybe String -> Token
ECap (Maybe String -> Token)
-> (Match String -> Maybe String) -> Match String -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Maybe String
forall a. a -> Maybe a
Just (String -> Maybe String)
-> (Match String -> String) -> Match String -> Maybe String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Match String -> String
forall a. Match a -> a
x_1
      , String
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a compOpt execOpt source a a.
RegexMaker a compOpt execOpt source =>
source -> (a -> a) -> (a, a -> Maybe a)
mk String
"\\$\\("               ((Match String -> Token) -> (Regex, Match String -> Maybe Token))
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a b. (a -> b) -> a -> b
$ Token -> Match String -> Token
forall a b. a -> b -> a
const (Token -> Match String -> Token) -> Token -> Match String -> Token
forall a b. (a -> b) -> a -> b
$ Maybe String -> Token
ECap Maybe String
forall a. Maybe a
Nothing
      , String
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a compOpt execOpt source a a.
RegexMaker a compOpt execOpt source =>
source -> (a -> a) -> (a, a -> Maybe a)
mk String
"\\(\\?:"              ((Match String -> Token) -> (Regex, Match String -> Maybe Token))
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a b. (a -> b) -> a -> b
$ Token -> Match String -> Token
forall a b. a -> b -> a
const   Token
PGrp
      , String
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a compOpt execOpt source a a.
RegexMaker a compOpt execOpt source =>
source -> (a -> a) -> (a, a -> Maybe a)
mk String
"\\(\\?"               ((Match String -> Token) -> (Regex, Match String -> Maybe Token))
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a b. (a -> b) -> a -> b
$ Token -> Match String -> Token
forall a b. a -> b -> a
const   Token
PCap
      , String
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a compOpt execOpt source a a.
RegexMaker a compOpt execOpt source =>
source -> (a -> a) -> (a, a -> Maybe a)
mk String
"\\("                  ((Match String -> Token) -> (Regex, Match String -> Maybe Token))
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a b. (a -> b) -> a -> b
$ Token -> Match String -> Token
forall a b. a -> b -> a
const   Token
Bra
      , String
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a compOpt execOpt source a a.
RegexMaker a compOpt execOpt source =>
source -> (a -> a) -> (a, a -> Maybe a)
mk String
"\\\\(.)"              ((Match String -> Token) -> (Regex, Match String -> Maybe Token))
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a b. (a -> b) -> a -> b
$         Char -> Token
BS    (Char -> Token) -> (Match String -> Char) -> Match String -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Char
forall p. [p] -> p
s2c (String -> Char)
-> (Match String -> String) -> Match String -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Match String -> String
forall a. Match a -> a
x_1
      , String
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a compOpt execOpt source a a.
RegexMaker a compOpt execOpt source =>
source -> (a -> a) -> (a, a -> Maybe a)
mk String
"(.|\n)"               ((Match String -> Token) -> (Regex, Match String -> Maybe Token))
-> (Match String -> Token) -> (Regex, Match String -> Maybe Token)
forall a b. (a -> b) -> a -> b
$         Char -> Token
Other (Char -> Token) -> (Match String -> Char) -> Match String -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Char
forall p. [p] -> p
s2c (String -> Char)
-> (Match String -> String) -> Match String -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Match String -> String
forall a. Match a -> a
x_1
      ]

    x_1 :: Match a -> a
x_1     = CaptureID -> Match a -> a
forall a. CaptureID -> Match a -> a
captureText (CaptureID -> Match a -> a) -> CaptureID -> Match a -> a
forall a b. (a -> b) -> a -> b
$ CaptureOrdinal -> CaptureID
IsCaptureOrdinal (CaptureOrdinal -> CaptureID) -> CaptureOrdinal -> CaptureID
forall a b. (a -> b) -> a -> b
$ Int -> CaptureOrdinal
CaptureOrdinal Int
1

    s2c :: [p] -> p
s2c [p
c] = p
c
    s2c [p]
_   = String -> p
forall a. String -> a
oops String
"s2c"

    mk :: source -> (a -> a) -> (a, a -> Maybe a)
mk source
s a -> a
f  = ((String -> a) -> (a -> a) -> Poss a -> a
forall b a. (String -> b) -> (a -> b) -> Poss a -> b
poss String -> a
forall a. HasCallStack => String -> a
error a -> a
forall a. a -> a
id (Poss a -> a) -> Poss a -> a
forall a b. (a -> b) -> a -> b
$ source -> Poss a
forall regex compOpt execOpt source (m :: * -> *).
(RegexMaker regex compOpt execOpt source, MonadFail m) =>
source -> m regex
makeRegexM source
s,a -> Maybe a
forall a. a -> Maybe a
Just (a -> Maybe a) -> (a -> a) -> a -> Maybe a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> a
f)

    oops :: String -> a
oops  String
m = String -> a
forall a. HasCallStack => String -> a
error (String -> a) -> String -> a
forall a b. (a -> b) -> a -> b
$ String
"NamedCaptures.scan: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
m
\end{code}


Parsing captures
----------------

\begin{code}
parse_capture :: String -> TH.Q TH.Exp
parse_capture :: String -> Q Exp
parse_capture String
s = case (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isDigit String
s of
  Bool
True  -> [|IsCaptureOrdinal $ CaptureOrdinal $ read s|]
  Bool
False -> [|IsCaptureName    $ CaptureName $ T.pack  s|]
\end{code}


Formatting [Token]
------------------

\begin{code}
-- | format [Token] into an RE string
formatTokens :: [Token] -> String
formatTokens :: [Token] -> String
formatTokens = FormatTokenREOptions -> [Token] -> String
formatTokens' FormatTokenREOptions
defFormatTokenREOptions

-- | options for the general Token formatter below
data FormatTokenREOptions =
  FormatTokenREOptions
    { FormatTokenREOptions -> Maybe RegexType
_fto_regex_type :: Maybe RegexType    -- ^ Posix, PCRE or indeterminate REs?
    , FormatTokenREOptions -> Bool
_fto_min_caps   :: Bool               -- ^ remove captures where possible
    , FormatTokenREOptions -> Bool
_fto_incl_caps  :: Bool               -- ^ include the captures in the output
    }
  deriving (Int -> FormatTokenREOptions -> ShowS
[FormatTokenREOptions] -> ShowS
FormatTokenREOptions -> String
(Int -> FormatTokenREOptions -> ShowS)
-> (FormatTokenREOptions -> String)
-> ([FormatTokenREOptions] -> ShowS)
-> Show FormatTokenREOptions
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [FormatTokenREOptions] -> ShowS
$cshowList :: [FormatTokenREOptions] -> ShowS
show :: FormatTokenREOptions -> String
$cshow :: FormatTokenREOptions -> String
showsPrec :: Int -> FormatTokenREOptions -> ShowS
$cshowsPrec :: Int -> FormatTokenREOptions -> ShowS
Show)

-- | the default configuration for the Token formatter
defFormatTokenREOptions :: FormatTokenREOptions
defFormatTokenREOptions :: FormatTokenREOptions
defFormatTokenREOptions =
  FormatTokenREOptions :: Maybe RegexType -> Bool -> Bool -> FormatTokenREOptions
FormatTokenREOptions
    { _fto_regex_type :: Maybe RegexType
_fto_regex_type = Maybe RegexType
forall a. Maybe a
Nothing
    , _fto_min_caps :: Bool
_fto_min_caps   = Bool
False
    , _fto_incl_caps :: Bool
_fto_incl_caps  = Bool
False
    }

-- | a configuration that will preserve the parsed regular expression
-- in the output
idFormatTokenREOptions :: FormatTokenREOptions
idFormatTokenREOptions :: FormatTokenREOptions
idFormatTokenREOptions =
  FormatTokenREOptions :: Maybe RegexType -> Bool -> Bool -> FormatTokenREOptions
FormatTokenREOptions
    { _fto_regex_type :: Maybe RegexType
_fto_regex_type = Maybe RegexType
forall a. Maybe a
Nothing
    , _fto_min_caps :: Bool
_fto_min_caps   = Bool
False
    , _fto_incl_caps :: Bool
_fto_incl_caps  = Bool
True
    }

-- | the general Token formatter, generating REs according to the options
formatTokens' :: FormatTokenREOptions -> [Token] -> String
formatTokens' :: FormatTokenREOptions -> [Token] -> String
formatTokens' FormatTokenREOptions{Bool
Maybe RegexType
_fto_incl_caps :: Bool
_fto_min_caps :: Bool
_fto_regex_type :: Maybe RegexType
_fto_incl_caps :: FormatTokenREOptions -> Bool
_fto_min_caps :: FormatTokenREOptions -> Bool
_fto_regex_type :: FormatTokenREOptions -> Maybe RegexType
..} = (Token -> ShowS) -> String -> [Token] -> String
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Token -> ShowS
f String
""
  where
    f :: Token -> ShowS
f Token
tk String
tl = String
t_s String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
tl
      where
        t_s :: String
t_s = case Token
tk of
          ECap  mb -> Maybe String -> String
ecap Maybe String
mb
          Token
PGrp     -> if Bool -> (RegexType -> Bool) -> Maybe RegexType -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False RegexType -> Bool
isTDFA Maybe RegexType
_fto_regex_type then String
"(" else String
"(?:"
          Token
PCap     -> String
"(?"
          Token
Bra      -> Bool -> String
bra Bool
_fto_min_caps
          BS    c  -> String
"\\" String -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char
c]
          Other c  -> [Char
c]

    ecap :: Maybe String -> String
ecap Maybe String
mb = case Bool
_fto_incl_caps of
      Bool
True  -> case Maybe String
mb of
        Maybe String
Nothing -> String
"$("
        Just String
nm -> String
"${"String -> ShowS
forall a. [a] -> [a] -> [a]
++String
nmString -> ShowS
forall a. [a] -> [a] -> [a]
++String
"}("
      Bool
False -> Bool -> String
bra Bool
_fto_min_caps

    bra :: Bool -> String
bra Bool
mc  = case Bool
mc Bool -> Bool -> Bool
&& Bool -> (RegexType -> Bool) -> Maybe RegexType -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False RegexType -> Bool
isPCRE Maybe RegexType
_fto_regex_type of
      Bool
True  -> String
"(?:"
      Bool
False -> String
"("
\end{code}

\begin{code}
-- this is a reference of formatTokens defFormatTokenREOptions,
-- used for testing the latter
formatTokens0 :: [Token] -> String
formatTokens0 :: [Token] -> String
formatTokens0 = (Token -> ShowS) -> String -> [Token] -> String
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Token -> ShowS
f String
""
  where
    f :: Token -> ShowS
f Token
tk String
tl = String
t_s String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
tl
      where
        t_s :: String
t_s = case Token
tk of
          ECap  _ -> String
"("
          Token
PGrp    -> String
"(?:"
          Token
PCap    -> String
"(?"
          Token
Bra     -> String
"("
          BS    c -> String
"\\" String -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char
c]
          Other c -> [Char
c]
\end{code}