{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}

module Text.Regex.Pcre2.TH where

import           Data.IORef
import           Data.List.NonEmpty         (NonEmpty)
import qualified Data.List.NonEmpty         as NE
import           Data.Map.Lazy              (Map)
import qualified Data.Map.Lazy              as Map
import           Data.Text                  (Text)
import qualified Data.Text                  as Text
import           Language.Haskell.TH
import           Language.Haskell.TH.Quote
import           Language.Haskell.TH.Syntax
import           System.IO.Unsafe           (unsafePerformIO)
import           Text.Regex.Pcre2.Internal

-- | Unexported, top-level `IORef` that\'s created upon the first runtime
-- evaluation of a Template Haskell `Matcher`.
globalMatcherCache :: IORef (Map Text Matcher)
globalMatcherCache :: IORef (Map Text Matcher)
globalMatcherCache = IO (IORef (Map Text Matcher)) -> IORef (Map Text Matcher)
forall a. IO a -> a
unsafePerformIO (IO (IORef (Map Text Matcher)) -> IORef (Map Text Matcher))
-> IO (IORef (Map Text Matcher)) -> IORef (Map Text Matcher)
forall a b. (a -> b) -> a -> b
$ Map Text Matcher -> IO (IORef (Map Text Matcher))
forall a. a -> IO (IORef a)
newIORef Map Text Matcher
forall k a. Map k a
Map.empty
{-# NOINLINE globalMatcherCache #-}

-- | Given a `Text`, create or retrieve a `Matcher` from the global cache.
memoMatcher :: Text -> Matcher
memoMatcher :: Text -> Matcher
memoMatcher Text
patt = IO Matcher -> Matcher
forall a. IO a -> a
unsafePerformIO (IO Matcher -> Matcher) -> IO Matcher -> Matcher
forall a b. (a -> b) -> a -> b
$ do
    Map Text Matcher
cache <- IORef (Map Text Matcher) -> IO (Map Text Matcher)
forall a. IORef a -> IO a
readIORef IORef (Map Text Matcher)
globalMatcherCache
    case Text -> Map Text Matcher -> Maybe Matcher
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Text
patt Map Text Matcher
cache of
        Just Matcher
matcher -> Matcher -> IO Matcher
forall (m :: * -> *) a. Monad m => a -> m a
return Matcher
matcher
        Maybe Matcher
Nothing      -> do
            let matcher :: Matcher
matcher = IO Matcher -> Matcher
forall a. IO a -> a
unsafePerformIO (IO Matcher -> Matcher) -> IO Matcher -> Matcher
forall a b. (a -> b) -> a -> b
$ Option -> Text -> IO Matcher
assembleMatcher Option
forall a. Monoid a => a
mempty Text
patt
            IORef (Map Text Matcher)
-> (Map Text Matcher -> (Map Text Matcher, ())) -> IO ()
forall a b. IORef a -> (a -> (a, b)) -> IO b
atomicModifyIORef' IORef (Map Text Matcher)
globalMatcherCache ((Map Text Matcher -> (Map Text Matcher, ())) -> IO ())
-> (Map Text Matcher -> (Map Text Matcher, ())) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Map Text Matcher
cache ->
                (Text -> Matcher -> Map Text Matcher -> Map Text Matcher
forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert Text
patt Matcher
matcher Map Text Matcher
cache, ())
            Matcher -> IO Matcher
forall (m :: * -> *) a. Monad m => a -> m a
return Matcher
matcher

-- | Generate code to produce \(and memoize\) a `Matcher` from a pattern.
matcherQ :: String -> ExpQ
matcherQ :: String -> ExpQ
matcherQ String
s = [e| memoMatcher $ Text.pack $(stringE s) |]

-- | Predict parenthesized captures \(maybe named\) of a pattern at splice time.
predictCaptureNamesQ :: String -> Q [Maybe Text]
predictCaptureNamesQ :: String -> Q [Maybe Text]
predictCaptureNamesQ = IO [Maybe Text] -> Q [Maybe Text]
forall a. IO a -> Q a
runIO (IO [Maybe Text] -> Q [Maybe Text])
-> (String -> IO [Maybe Text]) -> String -> Q [Maybe Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Option -> Text -> IO [Maybe Text]
predictCaptureNames Option
forall a. Monoid a => a
mempty (Text -> IO [Maybe Text])
-> (String -> Text) -> String -> IO [Maybe Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
Text.pack

-- | Get the indexes of `Just` the named captures.
toKVs :: [Maybe Text] -> [(Int, Text)]
toKVs :: [Maybe Text] -> [(Int, Text)]
toKVs [Maybe Text]
names = [(Int
number, Text
name) | (Int
number, Just Text
name) <- [Int] -> [Maybe Text] -> [(Int, Maybe Text)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
1 ..] [Maybe Text]
names]

-- | Generate the data-kinded phantom type parameter of `Captures` of a pattern,
-- if needed.
capturesInfoQ :: String -> Q (Maybe Type)
capturesInfoQ :: String -> Q (Maybe Type)
capturesInfoQ String
s = String -> Q [Maybe Text]
predictCaptureNamesQ String
s Q [Maybe Text]
-> ([Maybe Text] -> Q (Maybe Type)) -> Q (Maybe Type)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
    -- No parenthesized captures, so need for Captures, so no info.
    [] -> Maybe Type -> Q (Maybe Type)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe Type
forall a. Maybe a
Nothing

    -- One or more parenthesized captures.  Present
    --     [Just "foo", Just "bar", Nothing]
    -- as
    --     '(3, '[ '("foo", 1), '("bar", 2)]).
    [Maybe Text]
captureNames -> Type -> Maybe Type
forall a. a -> Maybe a
Just (Type -> Maybe Type) -> Q Type -> Q (Maybe Type)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> Q Type
promotedTupleT Int
2 Q Type -> Q Type -> Q Type
`appT` Q Type
hi Q Type -> Q Type -> Q Type
`appT` Q Type
kvs where
        -- 3
        hi :: Q Type
hi = TyLitQ -> Q Type
litT (TyLitQ -> Q Type) -> TyLitQ -> Q Type
forall a b. (a -> b) -> a -> b
$ Integer -> TyLitQ
numTyLit (Integer -> TyLitQ) -> Integer -> TyLitQ
forall a b. (a -> b) -> a -> b
$ Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Integer) -> Int -> Integer
forall a b. (a -> b) -> a -> b
$ [Maybe Text] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Maybe Text]
captureNames
        -- '[ '("foo", 1), '("bar", 2)]
        kvs :: Q Type
kvs = ((Int, Text) -> Q Type -> Q Type)
-> Q Type -> [(Int, Text)] -> Q Type
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (Int, Text) -> Q Type -> Q Type
forall a. Integral a => (a, Text) -> Q Type -> Q Type
f Q Type
promotedNilT ([(Int, Text)] -> Q Type) -> [(Int, Text)] -> Q Type
forall a b. (a -> b) -> a -> b
$ [Maybe Text] -> [(Int, Text)]
toKVs [Maybe Text]
captureNames where
            -- '("foo", 1) ': ...
            f :: (a, Text) -> Q Type -> Q Type
f (a
number, Text
name) = Q Type -> Q Type -> Q Type
appT (Q Type -> Q Type -> Q Type) -> Q Type -> Q Type -> Q Type
forall a b. (a -> b) -> a -> b
$ Q Type -> Q Type -> Q Type
appT Q Type
promotedConsT (Q Type -> Q Type) -> Q Type -> Q Type
forall a b. (a -> b) -> a -> b
$       -- ':
                Int -> Q Type
promotedTupleT Int
2                                 -- '(,)
                    Q Type -> Q Type -> Q Type
`appT` TyLitQ -> Q Type
litT (String -> TyLitQ
strTyLit (String -> TyLitQ) -> String -> TyLitQ
forall a b. (a -> b) -> a -> b
$ Text -> String
Text.unpack Text
name)    -- "foo"
                    Q Type -> Q Type -> Q Type
`appT` TyLitQ -> Q Type
litT (Integer -> TyLitQ
numTyLit (Integer -> TyLitQ) -> Integer -> TyLitQ
forall a b. (a -> b) -> a -> b
$ a -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
number) -- 1

-- | /__As an expression__/
--
-- > regex :: (Alternative f) => String -> Text -> f (Captures info)
--
-- in the presence of parenthesized captures, or
--
-- > regex :: (Alternative f) => String -> Text -> f Text
--
-- if there are none.  In other words, if there is more than the 0th capture,
-- this behaves like `capturesA` (except returning an opaque `Captures` instead
-- of a list), otherwise it behaves like `match`.
--
-- To retrieve an individual capture from a `Captures`, use `capture`.
--
-- > case [regex|(?<y>\d{4})-(?<m>\d{2})-(?<d>\d{2})|] "submitted 2020-10-20" of
-- >     Just cs ->
-- >         let date = capture @0 cs
-- >             year = read @Int $ Text.unpack $ capture @"y" cs
-- >             ...
--
-- > forM_ ([regex|\s+$|] line :: Maybe Text) $ \spaces -> error $
-- >     "line has trailing spaces (" ++ show (Text.length spaces) ++ " characters)"
--
-- /__As a pattern__/
--
-- This matches when the regex first matches, whereupon any named captures are
-- bound to variables of the same names.
--
-- > case "submitted 2020-10-20" of
-- >     [regex|(?<y>\d{4})-(?<m>\d{2})-(?<d>\d{2})|] ->
-- >         let year = read @Int $ Text.unpack y
-- >             ...
--
-- Note that it is not possible to access the 0th capture this way.  As a
-- workaround, explicitly capture the whole pattern and name it.
--
-- If there are no named captures, this simply acts as a guard.
regex :: QuasiQuoter
regex :: QuasiQuoter
regex = QuasiQuoter :: (String -> ExpQ)
-> (String -> Q Pat)
-> (String -> Q Type)
-> (String -> Q [Dec])
-> QuasiQuoter
QuasiQuoter {
    quoteExp :: String -> ExpQ
quoteExp = \String
s -> do
        let fromCsQ :: ExpQ
fromCsQ = String -> Q (Maybe Type)
capturesInfoQ String
s Q (Maybe Type) -> (Maybe Type -> ExpQ) -> ExpQ
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ExpQ -> (Type -> ExpQ) -> Maybe Type -> ExpQ
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [e| _headNE |] Type -> ExpQ
toWrapQ
            toWrapQ :: Type -> ExpQ
toWrapQ Type
info = [e|
                let wrap cs = Captures cs :: Captures $(return info)
                in to wrap |]

        [e| toAlternativeOf1 $
            _capturesInternal $(matcherQ s) getAllSliceRanges . $(fromCsQ) |],

    quotePat :: String -> Q Pat
quotePat = \String
s -> do
        [Maybe Text]
captureNames <- String -> Q [Maybe Text]
predictCaptureNamesQ String
s

        case [(Int, Text)] -> Maybe (NonEmpty (Int, Text))
forall a. [a] -> Maybe (NonEmpty a)
NE.nonEmpty ([(Int, Text)] -> Maybe (NonEmpty (Int, Text)))
-> [(Int, Text)] -> Maybe (NonEmpty (Int, Text))
forall a b. (a -> b) -> a -> b
$ [Maybe Text] -> [(Int, Text)]
toKVs [Maybe Text]
captureNames of
            -- No named captures.  Test whether the string matches without
            -- creating any new Text values.
            Maybe (NonEmpty (Int, Text))
Nothing -> ExpQ -> Q Pat -> Q Pat
viewP
                [e| has $ _capturesInternal $(matcherQ s) errorFromMatch |]
                [p| True |]

            -- One or more named captures.  Attempt to bind only those to local
            -- variables of the same names.
            Just NonEmpty (Int, Text)
numberedNames -> ExpQ -> Q Pat -> Q Pat
viewP ExpQ
e Q Pat
p where
                (NonEmpty Int
numbers, NonEmpty Text
names) = NonEmpty (Int, Text) -> (NonEmpty Int, NonEmpty Text)
forall (f :: * -> *) a b. Functor f => f (a, b) -> (f a, f b)
NE.unzip NonEmpty (Int, Text)
numberedNames
                e :: ExpQ
e = [e|
                    let _cs = _capturesInternal
                            $(matcherQ s)
                            (getWhitelistedSliceRanges $(liftData numbers))
                    in view $ _cs . to NE.toList |]
                p :: Q Pat
p = (Text -> Q Pat -> Q Pat) -> Q Pat -> NonEmpty Text -> Q Pat
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Text -> Q Pat -> Q Pat
f Q Pat
wildP NonEmpty Text
names where
                    f :: Text -> Q Pat -> Q Pat
f Text
name Q Pat
r = Name -> [Q Pat] -> Q Pat
conP '(:) [Name -> Q Pat
varP (Name -> Q Pat) -> Name -> Q Pat
forall a b. (a -> b) -> a -> b
$ String -> Name
mkName (String -> Name) -> String -> Name
forall a b. (a -> b) -> a -> b
$ Text -> String
Text.unpack Text
name, Q Pat
r],

    quoteType :: String -> Q Type
quoteType = Q Type -> String -> Q Type
forall a b. a -> b -> a
const (Q Type -> String -> Q Type) -> Q Type -> String -> Q Type
forall a b. (a -> b) -> a -> b
$ String -> Q Type
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"regex: cannot produce a type",

    quoteDec :: String -> Q [Dec]
quoteDec = Q [Dec] -> String -> Q [Dec]
forall a b. a -> b -> a
const (Q [Dec] -> String -> Q [Dec]) -> Q [Dec] -> String -> Q [Dec]
forall a b. (a -> b) -> a -> b
$ String -> Q [Dec]
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"regex: cannot produce declarations"}

-- | A global, optical variant of `regex`.  Can only be used as an expression.
--
-- > _regex :: String -> Traversal' Text (Captures info)
-- > _regex :: String -> Traversal' Text Text
--
-- > import Control.Lens
-- > import Data.Text.Lens
-- >
-- > embeddedNumber :: Traversal' String Int
-- > embeddedNumber = packed . [_regex|\d+|] . unpacked . _Show
-- >
-- > main :: IO ()
-- > main = putStrLn $ "There are 14 competing standards" & embeddedNumber %~ (+ 1)
-- >
-- > -- There are 15 competing standards
--
_regex :: QuasiQuoter
_regex :: QuasiQuoter
_regex = QuasiQuoter :: (String -> ExpQ)
-> (String -> Q Pat)
-> (String -> Q Type)
-> (String -> Q [Dec])
-> QuasiQuoter
QuasiQuoter {
    quoteExp :: String -> ExpQ
quoteExp = \String
s -> do
        let fromCsQ :: ExpQ
fromCsQ = String -> Q (Maybe Type)
capturesInfoQ String
s Q (Maybe Type) -> (Maybe Type -> ExpQ) -> ExpQ
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ExpQ -> (Type -> ExpQ) -> Maybe Type -> ExpQ
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [e| _headNE |] Type -> ExpQ
wrappedQ
            wrappedQ :: Type -> ExpQ
wrappedQ Type
info = [e|
                let wrapped :: Lens' (NonEmpty Text) (Captures $(return info))
                    wrapped f cs = f (Captures cs) <&> \(Captures cs') -> cs'
                in wrapped |]

        [e| _capturesInternal $(matcherQ s) getAllSliceRanges . $(fromCsQ) |],

    quotePat :: String -> Q Pat
quotePat = Q Pat -> String -> Q Pat
forall a b. a -> b -> a
const (Q Pat -> String -> Q Pat) -> Q Pat -> String -> Q Pat
forall a b. (a -> b) -> a -> b
$ String -> Q Pat
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"_regex: cannot produce a pattern",

    quoteType :: String -> Q Type
quoteType = Q Type -> String -> Q Type
forall a b. a -> b -> a
const (Q Type -> String -> Q Type) -> Q Type -> String -> Q Type
forall a b. (a -> b) -> a -> b
$ String -> Q Type
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"_regex: cannot produce a type",

    quoteDec :: String -> Q [Dec]
quoteDec = Q [Dec] -> String -> Q [Dec]
forall a b. a -> b -> a
const (Q [Dec] -> String -> Q [Dec]) -> Q [Dec] -> String -> Q [Dec]
forall a b. (a -> b) -> a -> b
$ String -> Q [Dec]
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"_regex: cannot produce declarations"}