{-# LANGUAGE BangPatterns      #-}
{-# LANGUAGE LambdaCase        #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes       #-}
{-# LANGUAGE StrictData        #-}
{-# LANGUAGE TemplateHaskell   #-}

{-|
Module      : Headroom.Data.Regex
Description : Helper functions for regular expressions
Copyright   : (c) 2019-2021 Vaclav Svejcar
License     : BSD-3-Clause
Maintainer  : vaclav.svejcar@gmail.com
Stability   : experimental
Portability : POSIX

Extends functionalify provided by "Text.Regex.PCRE.Light"
and "Text.Regex.PCRE.Heavy" that more suits the needs of this application.
-}

module Headroom.Data.Regex
  ( -- * Data Types
    Regex(..)
  , RegexError(..)
    -- * Regex Functions
  , compile
  , match
  , isMatch
  , re
  , replace
  , replaceFirst
  , scan
    -- * Unsafe Functions
  , compileUnsafe
  )
where

import           Data.Aeson                          ( FromJSON(..)
                                                     , Value(String)
                                                     )
import           Data.String.Interpolate             ( iii )
import           Headroom.Data.Coerce                ( coerce )
import           Headroom.Types                      ( fromHeadroomError
                                                     , toHeadroomError
                                                     )
import           Language.Haskell.TH.Quote           ( QuasiQuoter(..) )
import           RIO
import qualified RIO.Text                           as T
import qualified Text.Regex.PCRE.Heavy              as PH
import qualified Text.Regex.PCRE.Light              as PL
import qualified Text.Regex.PCRE.Light.Base         as PL
                                                     ( Regex(..) )
import qualified Text.Regex.PCRE.Light.Char8        as PLC


---------------------------------  DATA TYPES  ---------------------------------

-- | Represents compiled /regex/, encapsulates the actual implementation.
newtype Regex = Regex PL.Regex


instance Eq Regex where
  Regex (PL.Regex ForeignPtr PCRE
_ ByteString
r1) == :: Regex -> Regex -> Bool
== Regex (PL.Regex ForeignPtr PCRE
_ ByteString
r2) = ByteString
r1 ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
r2


instance Show Regex where
  show :: Regex -> String
show (Regex (PL.Regex ForeignPtr PCRE
_ ByteString
r)) = ByteString -> String
forall a. Show a => a -> String
show ByteString
r


instance FromJSON Regex where
  parseJSON :: Value -> Parser Regex
parseJSON (String Text
s) = Regex -> Parser Regex
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Regex -> Parser Regex) -> (Text -> Regex) -> Text -> Parser Regex
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Regex
compileUnsafe (Text -> Parser Regex) -> Text -> Parser Regex
forall a b. (a -> b) -> a -> b
$ Text
s
  parseJSON Value
val = String -> Parser Regex
forall a. HasCallStack => String -> a
error (String -> Parser Regex) -> String -> Parser Regex
forall a b. (a -> b) -> a -> b
$ String
"Invalid value: expected regex, found: " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Value -> String
forall a. Show a => a -> String
show Value
val


------------------------------  PUBLIC FUNCTIONS  ------------------------------


-- | Compiles given /regex/ in /runtime/. If possible, prefer the 're'
-- /quasi quotation/ version that does the same at /compile time/.
compile :: MonadThrow m
        => Text
        -- ^ /regex/ to compile
        -> m Regex
        -- ^ compiled regex
compile :: Text -> m Regex
compile Text
raw = (String -> m Regex)
-> (Regex -> m Regex) -> Either String Regex -> m Regex
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (RegexError -> m Regex
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (RegexError -> m Regex)
-> (String -> RegexError) -> String -> m Regex
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text -> RegexError
CompilationFailed Text
raw (Text -> RegexError) -> (String -> Text) -> String -> RegexError
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack) Regex -> m Regex
forall (f :: * -> *) a. Applicative f => a -> f a
pure Either String Regex
compile'
  where compile' :: Either String Regex
compile' = Regex -> Regex
Regex (Regex -> Regex) -> Either String Regex -> Either String Regex
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ByteString -> [PCREOption] -> Either String Regex
PH.compileM (Text -> ByteString
encodeUtf8 Text
raw) [PCREOption
PLC.utf8]


-- | Same as 'PLC.match', but works with 'Text' and uses no additional options.
match :: Regex
      -- ^ a PCRE regular expression value produced by compile
      -> Text
      -- ^ the subject text to match against
      -> Maybe [Text]
      -- ^ the result value
match :: Regex -> Text -> Maybe [Text]
match (Regex Regex
r) Text
subject = (String -> Text) -> [String] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> Text
T.pack ([String] -> [Text]) -> Maybe [String] -> Maybe [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Regex -> String -> [PCREExecOption] -> Maybe [String]
PLC.match Regex
r (Text -> String
T.unpack Text
subject) []


-- | Same as 'match', but instead of returning matched text it only indicates
-- whether the given text matches the pattern or not.
isMatch :: Regex
        -- ^ a PCRE regular expression value produced by compile
        -> Text
        -- ^ the subject text to match against
        -> Bool
        -- ^ the result value
isMatch :: Regex -> Text -> Bool
isMatch Regex
regex Text
subject = Maybe [Text] -> Bool
forall a. Maybe a -> Bool
isJust (Maybe [Text] -> Bool) -> Maybe [Text] -> Bool
forall a b. (a -> b) -> a -> b
$ Regex -> Text -> Maybe [Text]
match Regex
regex Text
subject


-- | A QuasiQuoter for regular expressions that does a compile time check.
re :: QuasiQuoter
re :: QuasiQuoter
re = QuasiQuoter :: (String -> Q Exp)
-> (String -> Q Pat)
-> (String -> Q Type)
-> (String -> Q [Dec])
-> QuasiQuoter
QuasiQuoter { quoteExp :: String -> Q Exp
quoteExp  = String -> Q Exp
quoteExpRegex
                 , quotePat :: String -> Q Pat
quotePat  = String -> Q Pat
forall a. HasCallStack => a
undefined
                 , quoteType :: String -> Q Type
quoteType = String -> Q Type
forall a. HasCallStack => a
undefined
                 , quoteDec :: String -> Q [Dec]
quoteDec  = String -> Q [Dec]
forall a. HasCallStack => a
undefined
                 }
 where
  quoteExpRegex :: String -> Q Exp
quoteExpRegex String
txt = [| compileUnsafe . T.pack $ txt |]
    where !Regex
_ = Text -> Regex
compileUnsafe (Text -> Regex) -> (String -> Text) -> String -> Regex
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack (String -> Regex) -> String -> Regex
forall a b. (a -> b) -> a -> b
$ String
txt -- check at compile time


-- | Replaces all occurences of given /regex/.
replace :: Regex
        -- ^ pattern to replace
        -> (Text -> [Text] -> Text)
        -- ^ replacement function (as @fullMatch -> [groups] -> result@)
        -> Text
        -- ^ text to replace in
        -> Text
        -- ^ resulting text
replace :: Regex -> (Text -> [Text] -> Text) -> Text -> Text
replace = Regex -> (Text -> [Text] -> Text) -> Text -> Text
forall a r.
(ConvertibleStrings ByteString a, ConvertibleStrings a ByteString,
 RegexReplacement r) =>
Regex -> r -> a -> a
PH.gsub (Regex -> (Text -> [Text] -> Text) -> Text -> Text)
-> (Regex -> Regex)
-> Regex
-> (Text -> [Text] -> Text)
-> Text
-> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Regex -> Regex
coerce


-- | Replaces only first occurence of given /regex/.
replaceFirst :: Regex
             -- ^ pattern to replace
             -> (Text -> [Text] -> Text)
             -- ^ replacement function (as @fullMatch -> [groups] -> result@)
             -> Text
             -- ^ text to replace in
             -> Text
             -- ^ resulting text
replaceFirst :: Regex -> (Text -> [Text] -> Text) -> Text -> Text
replaceFirst = Regex -> (Text -> [Text] -> Text) -> Text -> Text
forall a r.
(ConvertibleStrings ByteString a, ConvertibleStrings a ByteString,
 RegexReplacement r) =>
Regex -> r -> a -> a
PH.sub (Regex -> (Text -> [Text] -> Text) -> Text -> Text)
-> (Regex -> Regex)
-> Regex
-> (Text -> [Text] -> Text)
-> Text
-> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Regex -> Regex
coerce


-- | Searches the text for all occurences of given /regex/.
scan :: Regex
     -- ^ /regex/ to search for
     -> Text
     -- ^ input text
     -> [(Text, [Text])]
     -- ^ found occurences (as @[(fullMatch, [groups])]@)
scan :: Regex -> Text -> [(Text, [Text])]
scan = Regex -> Text -> [(Text, [Text])]
forall a.
(ConvertibleStrings ByteString a,
 ConvertibleStrings a ByteString) =>
Regex -> a -> [(a, [a])]
PH.scan (Regex -> Text -> [(Text, [Text])])
-> (Regex -> Regex) -> Regex -> Text -> [(Text, [Text])]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Regex -> Regex
coerce


-- | Compiles the given text into /regex/ in runtime. Note that if the /regex/
-- cannot be compiled, it will throw runtime error. Do not use this function
-- unless you know what you're doing.
compileUnsafe :: Text
              -- ^ /regex/ to compile
              -> Regex
              -- ^ compiled /regex/ or runtime exception
compileUnsafe :: Text -> Regex
compileUnsafe Text
raw = case Text -> Either SomeException Regex
forall (m :: * -> *). MonadThrow m => Text -> m Regex
compile Text
raw of
  Left  SomeException
err -> String -> Regex
forall a. HasCallStack => String -> a
error (String -> Regex)
-> (SomeException -> String) -> SomeException -> Regex
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SomeException -> String
forall e. Exception e => e -> String
displayException (SomeException -> Regex) -> SomeException -> Regex
forall a b. (a -> b) -> a -> b
$ SomeException
err
  Right Regex
res -> Regex
res


---------------------------------  ERROR TYPES  --------------------------------

-- | Exception specific to the "Headroom.Data.Regex" module.
data RegexError = CompilationFailed Text Text
                -- ^ given input cannot be compiled as /regex/
  deriving (Int -> RegexError -> ShowS
[RegexError] -> ShowS
RegexError -> String
(Int -> RegexError -> ShowS)
-> (RegexError -> String)
-> ([RegexError] -> ShowS)
-> Show RegexError
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [RegexError] -> ShowS
$cshowList :: [RegexError] -> ShowS
show :: RegexError -> String
$cshow :: RegexError -> String
showsPrec :: Int -> RegexError -> ShowS
$cshowsPrec :: Int -> RegexError -> ShowS
Show, Typeable)

instance Exception RegexError where
  displayException :: RegexError -> String
displayException = RegexError -> String
displayException'
  toException :: RegexError -> SomeException
toException      = RegexError -> SomeException
forall e. Exception e => e -> SomeException
toHeadroomError
  fromException :: SomeException -> Maybe RegexError
fromException    = SomeException -> Maybe RegexError
forall e. Exception e => SomeException -> Maybe e
fromHeadroomError

displayException' :: RegexError -> String
displayException' :: RegexError -> String
displayException' = \case
  CompilationFailed Text
raw Text
reason -> [iii|
      Cannot compile regex from input '#{raw}', reason: #{reason}
    |]