{-# OPTIONS_GHC -fno-warn-orphans          #-}

-- |
-- Copyright: (c) 2006-2017, Chris Dornan and Christopher Kuklewicz
-- SPDX-License-Identifier: BSD-3-Clause
--
-- This exports instances of the high level API and the medium level API of 'compile','execute', and 'regexec'.
--
-- @since 0.95.0
module Text.Regex.PCRE.Text
  (
  -- * Types
    Regex
  , MatchOffset
  , MatchLength
  , CompOption(CompOption)
  , ExecOption(ExecOption)
  , ReturnCode
  , WrapError
  -- * Miscellaneous
  , unusedOffset
  , getVersion
  -- * Medium level API functions
  , compile
  , execute
  , regexec
  -- * CompOption flags
  , compBlank
  , compAnchored
  , compAutoCallout
  , compCaseless
  , compDollarEndOnly
  , compDotAll
  , compExtended
  , compExtra
  , compFirstLine
  , compMultiline
  , compNoAutoCapture
  , compUngreedy
  , compUTF8
  , compNoUTF8Check
  -- * ExecOption flags
  , execBlank
  , execAnchored
  , execNotBOL
  , execNotEOL
  , execNotEmpty
  , execNoUTF8Check
  , execPartial
  ) where

import           Data.Array(Array,listArray)
import           Data.Char(ord)
import           Control.Monad.Fail (fail)
import           Prelude hiding (fail)
import qualified Data.ByteString              as B
import qualified Data.ByteString.Unsafe       as B
import qualified Data.Text                    as T
import qualified Data.Text.Encoding           as T
import           Foreign.C.String(CStringLen,CString)
import           Foreign(nullPtr)
import           System.IO.Unsafe (unsafePerformIO)
import           Text.Regex.Base.Impl
import           Text.Regex.Base.RegexLike
import           Text.Regex.PCRE.Wrap


instance RegexContext Regex T.Text T.Text where
  match :: Regex -> Text -> Text
match   = Regex -> Text -> Text
forall a b. RegexLike a b => a -> b -> b
polymatch
  matchM :: Regex -> Text -> m Text
matchM  = Regex -> Text -> m Text
forall a b (m :: * -> *).
(RegexLike a b, MonadFail m) =>
a -> b -> m b
polymatchM

instance RegexMaker Regex CompOption ExecOption T.Text where
  makeRegexOpts :: CompOption -> ExecOption -> Text -> Regex
makeRegexOpts CompOption
c ExecOption
e Text
pat = IO Regex -> Regex
forall a. IO a -> a
unsafePerformIO (IO Regex -> Regex) -> IO Regex -> Regex
forall a b. (a -> b) -> a -> b
$
    CompOption
-> ExecOption -> Text -> IO (Either (MatchOffset, String) Regex)
compile CompOption
c ExecOption
e Text
pat IO (Either (MatchOffset, String) Regex)
-> (Either (MatchOffset, String) Regex -> IO Regex) -> IO Regex
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Either (MatchOffset, String) Regex -> IO Regex
forall e v. Show e => Either e v -> IO v
unwrap

  makeRegexOptsM :: CompOption -> ExecOption -> Text -> m Regex
makeRegexOptsM CompOption
c ExecOption
e Text
pat = ((MatchOffset, String) -> m Regex)
-> (Regex -> m Regex)
-> Either (MatchOffset, String) Regex
-> m Regex
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (String -> m Regex
forall (m :: * -> *) a. MonadFail m => String -> m a
fail(String -> m Regex)
-> ((MatchOffset, String) -> String)
-> (MatchOffset, String)
-> m Regex
forall b c a. (b -> c) -> (a -> b) -> a -> c
.(MatchOffset, String) -> String
forall a. Show a => a -> String
show) Regex -> m Regex
forall (m :: * -> *) a. Monad m => a -> m a
return (Either (MatchOffset, String) Regex -> m Regex)
-> Either (MatchOffset, String) Regex -> m Regex
forall a b. (a -> b) -> a -> b
$ IO (Either (MatchOffset, String) Regex)
-> Either (MatchOffset, String) Regex
forall a. IO a -> a
unsafePerformIO (IO (Either (MatchOffset, String) Regex)
 -> Either (MatchOffset, String) Regex)
-> IO (Either (MatchOffset, String) Regex)
-> Either (MatchOffset, String) Regex
forall a b. (a -> b) -> a -> b
$
    CompOption
-> ExecOption -> Text -> IO (Either (MatchOffset, String) Regex)
compile CompOption
c ExecOption
e Text
pat

instance RegexLike Regex T.Text where
  matchTest :: Regex -> Text -> Bool
matchTest Regex
re Text
tx = IO Bool -> Bool
forall a. IO a -> a
unsafePerformIO (IO Bool -> Bool) -> IO Bool -> Bool
forall a b. (a -> b) -> a -> b
$
    Text
-> (CStringLen -> IO (Either WrapError Bool))
-> IO (Either WrapError Bool)
forall a. Text -> (CStringLen -> IO a) -> IO a
asCStringLen Text
tx (MatchOffset -> Regex -> CStringLen -> IO (Either WrapError Bool)
wrapTest MatchOffset
0 Regex
re) IO (Either WrapError Bool)
-> (Either WrapError Bool -> IO Bool) -> IO Bool
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Either WrapError Bool -> IO Bool
forall e v. Show e => Either e v -> IO v
unwrap

  matchOnce :: Regex -> Text -> Maybe MatchArray
matchOnce Regex
re Text
tx = IO (Maybe MatchArray) -> Maybe MatchArray
forall a. IO a -> a
unsafePerformIO (IO (Maybe MatchArray) -> Maybe MatchArray)
-> IO (Maybe MatchArray) -> Maybe MatchArray
forall a b. (a -> b) -> a -> b
$
    Regex -> Text -> IO (Either WrapError (Maybe MatchArray))
execute Regex
re Text
tx IO (Either WrapError (Maybe MatchArray))
-> (Either WrapError (Maybe MatchArray) -> IO (Maybe MatchArray))
-> IO (Maybe MatchArray)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Either WrapError (Maybe MatchArray) -> IO (Maybe MatchArray)
forall e v. Show e => Either e v -> IO v
unwrap

  matchAll :: Regex -> Text -> [MatchArray]
matchAll Regex
re Text
tx = IO [MatchArray] -> [MatchArray]
forall a. IO a -> a
unsafePerformIO (IO [MatchArray] -> [MatchArray])
-> IO [MatchArray] -> [MatchArray]
forall a b. (a -> b) -> a -> b
$
    Text
-> (CStringLen -> IO (Either WrapError [MatchArray]))
-> IO (Either WrapError [MatchArray])
forall a. Text -> (CStringLen -> IO a) -> IO a
asCStringLen Text
tx (Regex -> CStringLen -> IO (Either WrapError [MatchArray])
wrapMatchAll Regex
re) IO (Either WrapError [MatchArray])
-> (Either WrapError [MatchArray] -> IO [MatchArray])
-> IO [MatchArray]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Either WrapError [MatchArray] -> IO [MatchArray]
forall e v. Show e => Either e v -> IO v
unwrap

  matchCount :: Regex -> Text -> MatchOffset
matchCount Regex
re Text
tx = IO MatchOffset -> MatchOffset
forall a. IO a -> a
unsafePerformIO (IO MatchOffset -> MatchOffset) -> IO MatchOffset -> MatchOffset
forall a b. (a -> b) -> a -> b
$
    Text
-> (CStringLen -> IO (Either WrapError MatchOffset))
-> IO (Either WrapError MatchOffset)
forall a. Text -> (CStringLen -> IO a) -> IO a
asCStringLen Text
tx (Regex -> CStringLen -> IO (Either WrapError MatchOffset)
wrapCount Regex
re) IO (Either WrapError MatchOffset)
-> (Either WrapError MatchOffset -> IO MatchOffset)
-> IO MatchOffset
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Either WrapError MatchOffset -> IO MatchOffset
forall e v. Show e => Either e v -> IO v
unwrap


-- ---------------------------------------------------------------------
-- | Compiles a regular expression
compile :: CompOption   -- ^ (summed together)
        -> ExecOption   -- ^ (summed together)
        -> T.Text       -- ^ The regular expression to compile
        -> IO (Either (MatchOffset,String) Regex) -- ^ Returns: the compiled regular expression
compile :: CompOption
-> ExecOption -> Text -> IO (Either (MatchOffset, String) Regex)
compile CompOption
c ExecOption
e Text
pat =
  -- PCRE does not allow one to specify a length for the regular expression, it must by 0 terminated
  Text
-> (CString -> IO (Either (MatchOffset, String) Regex))
-> IO (Either (MatchOffset, String) Regex)
forall a. Text -> (CString -> IO a) -> IO a
asCString Text
pat ((CString -> IO (Either (MatchOffset, String) Regex))
 -> IO (Either (MatchOffset, String) Regex))
-> (CString -> IO (Either (MatchOffset, String) Regex))
-> IO (Either (MatchOffset, String) Regex)
forall a b. (a -> b) -> a -> b
$ CompOption
-> ExecOption -> CString -> IO (Either (MatchOffset, String) Regex)
wrapCompile CompOption
c ExecOption
e


-- ---------------------------------------------------------------------
-- | Matches a regular expression against a string
execute :: Regex      -- ^ Compiled regular expression
        -> T.Text     -- ^ Text to match against
        -> IO (Either WrapError (Maybe (Array Int (MatchOffset,MatchLength))))
                -- ^ Returns: 'Nothing' if the regex did not match the
                -- string, or 'Just' an array of (offset,length) pairs
                -- where index 0 is whole match, and the rest are the
                -- captured subexpressions.
execute :: Regex -> Text -> IO (Either WrapError (Maybe MatchArray))
execute Regex
re Text
tx = do
  Either WrapError (Maybe [(MatchOffset, MatchOffset)])
maybeStartEnd <- Text
-> (CStringLen
    -> IO (Either WrapError (Maybe [(MatchOffset, MatchOffset)])))
-> IO (Either WrapError (Maybe [(MatchOffset, MatchOffset)]))
forall a. Text -> (CStringLen -> IO a) -> IO a
asCStringLen Text
tx (MatchOffset
-> Regex
-> CStringLen
-> IO (Either WrapError (Maybe [(MatchOffset, MatchOffset)]))
wrapMatch MatchOffset
0 Regex
re)
  case Either WrapError (Maybe [(MatchOffset, MatchOffset)])
maybeStartEnd of
    Right Maybe [(MatchOffset, MatchOffset)]
Nothing      -> Either WrapError (Maybe MatchArray)
-> IO (Either WrapError (Maybe MatchArray))
forall (m :: * -> *) a. Monad m => a -> m a
return (Either WrapError (Maybe MatchArray)
 -> IO (Either WrapError (Maybe MatchArray)))
-> Either WrapError (Maybe MatchArray)
-> IO (Either WrapError (Maybe MatchArray))
forall a b. (a -> b) -> a -> b
$ Maybe MatchArray -> Either WrapError (Maybe MatchArray)
forall a b. b -> Either a b
Right Maybe MatchArray
forall a. Maybe a
Nothing
    Right (Just [(MatchOffset, MatchOffset)]
parts) ->
      Either WrapError (Maybe MatchArray)
-> IO (Either WrapError (Maybe MatchArray))
forall (m :: * -> *) a. Monad m => a -> m a
return (Either WrapError (Maybe MatchArray)
 -> IO (Either WrapError (Maybe MatchArray)))
-> Either WrapError (Maybe MatchArray)
-> IO (Either WrapError (Maybe MatchArray))
forall a b. (a -> b) -> a -> b
$ Maybe MatchArray -> Either WrapError (Maybe MatchArray)
forall a b. b -> Either a b
Right (Maybe MatchArray -> Either WrapError (Maybe MatchArray))
-> Maybe MatchArray -> Either WrapError (Maybe MatchArray)
forall a b. (a -> b) -> a -> b
$ MatchArray -> Maybe MatchArray
forall a. a -> Maybe a
Just (MatchArray -> Maybe MatchArray) -> MatchArray -> Maybe MatchArray
forall a b. (a -> b) -> a -> b
$ (MatchOffset, MatchOffset)
-> [(MatchOffset, MatchOffset)] -> MatchArray
forall i e. Ix i => (i, i) -> [e] -> Array i e
listArray (MatchOffset
0,MatchOffset -> MatchOffset
forall a. Enum a => a -> a
pred (MatchOffset -> MatchOffset) -> MatchOffset -> MatchOffset
forall a b. (a -> b) -> a -> b
$ [(MatchOffset, MatchOffset)] -> MatchOffset
forall (t :: * -> *) a. Foldable t => t a -> MatchOffset
length [(MatchOffset, MatchOffset)]
parts)
          [ (MatchOffset
s,MatchOffset
eMatchOffset -> MatchOffset -> MatchOffset
forall a. Num a => a -> a -> a
-MatchOffset
s) | (MatchOffset
s,MatchOffset
e) <- [(MatchOffset, MatchOffset)]
parts ]
    Left WrapError
err           -> Either WrapError (Maybe MatchArray)
-> IO (Either WrapError (Maybe MatchArray))
forall (m :: * -> *) a. Monad m => a -> m a
return (Either WrapError (Maybe MatchArray)
 -> IO (Either WrapError (Maybe MatchArray)))
-> Either WrapError (Maybe MatchArray)
-> IO (Either WrapError (Maybe MatchArray))
forall a b. (a -> b) -> a -> b
$ WrapError -> Either WrapError (Maybe MatchArray)
forall a b. a -> Either a b
Left WrapError
err


-- ---------------------------------------------------------------------
-- | Matches a regular expression against a string
regexec :: Regex      -- ^ Compiled regular expression
        -> T.Text     -- ^ Text to match against
        -> IO (Either WrapError (Maybe (T.Text, T.Text, T.Text, [T.Text])))
                -- ^ Returns: 'Nothing' if the regex did not match the
                -- string, or 'Just' text including before and after text
regexec :: Regex
-> Text -> IO (Either WrapError (Maybe (Text, Text, Text, [Text])))
regexec Regex
re Text
tx = do
  Either WrapError (Maybe [(MatchOffset, MatchOffset)])
mb <- Text
-> (CStringLen
    -> IO (Either WrapError (Maybe [(MatchOffset, MatchOffset)])))
-> IO (Either WrapError (Maybe [(MatchOffset, MatchOffset)]))
forall a. Text -> (CStringLen -> IO a) -> IO a
asCStringLen Text
tx ((CStringLen
  -> IO (Either WrapError (Maybe [(MatchOffset, MatchOffset)])))
 -> IO (Either WrapError (Maybe [(MatchOffset, MatchOffset)])))
-> (CStringLen
    -> IO (Either WrapError (Maybe [(MatchOffset, MatchOffset)])))
-> IO (Either WrapError (Maybe [(MatchOffset, MatchOffset)]))
forall a b. (a -> b) -> a -> b
$ MatchOffset
-> Regex
-> CStringLen
-> IO (Either WrapError (Maybe [(MatchOffset, MatchOffset)]))
wrapMatch MatchOffset
0 Regex
re
  case Either WrapError (Maybe [(MatchOffset, MatchOffset)])
mb of
    Right  Maybe [(MatchOffset, MatchOffset)]
Nothing     -> Either WrapError (Maybe (Text, Text, Text, [Text]))
-> IO (Either WrapError (Maybe (Text, Text, Text, [Text])))
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe (Text, Text, Text, [Text])
-> Either WrapError (Maybe (Text, Text, Text, [Text]))
forall a b. b -> Either a b
Right Maybe (Text, Text, Text, [Text])
forall a. Maybe a
Nothing)
    Right (Just [(MatchOffset, MatchOffset)]
parts) -> Either WrapError (Maybe (Text, Text, Text, [Text]))
-> IO (Either WrapError (Maybe (Text, Text, Text, [Text])))
forall (m :: * -> *) a. Monad m => a -> m a
return (Either WrapError (Maybe (Text, Text, Text, [Text]))
 -> IO (Either WrapError (Maybe (Text, Text, Text, [Text]))))
-> ([(MatchOffset, MatchOffset)]
    -> Either WrapError (Maybe (Text, Text, Text, [Text])))
-> [(MatchOffset, MatchOffset)]
-> IO (Either WrapError (Maybe (Text, Text, Text, [Text])))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Maybe (Text, Text, Text, [Text])
-> Either WrapError (Maybe (Text, Text, Text, [Text]))
forall a b. b -> Either a b
Right (Maybe (Text, Text, Text, [Text])
 -> Either WrapError (Maybe (Text, Text, Text, [Text])))
-> ([(MatchOffset, MatchOffset)]
    -> Maybe (Text, Text, Text, [Text]))
-> [(MatchOffset, MatchOffset)]
-> Either WrapError (Maybe (Text, Text, Text, [Text]))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text, Text, Text, [Text]) -> Maybe (Text, Text, Text, [Text])
forall a. a -> Maybe a
Just ((Text, Text, Text, [Text]) -> Maybe (Text, Text, Text, [Text]))
-> ([(MatchOffset, MatchOffset)] -> (Text, Text, Text, [Text]))
-> [(MatchOffset, MatchOffset)]
-> Maybe (Text, Text, Text, [Text])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(MatchOffset, MatchOffset)] -> (Text, Text, Text, [Text])
matchedParts ([(MatchOffset, MatchOffset)]
 -> IO (Either WrapError (Maybe (Text, Text, Text, [Text]))))
-> [(MatchOffset, MatchOffset)]
-> IO (Either WrapError (Maybe (Text, Text, Text, [Text])))
forall a b. (a -> b) -> a -> b
$ [(MatchOffset, MatchOffset)]
parts
    Left WrapError
err           -> Either WrapError (Maybe (Text, Text, Text, [Text]))
-> IO (Either WrapError (Maybe (Text, Text, Text, [Text])))
forall (m :: * -> *) a. Monad m => a -> m a
return (WrapError -> Either WrapError (Maybe (Text, Text, Text, [Text]))
forall a b. a -> Either a b
Left WrapError
err)
  where
    matchedParts :: [(MatchOffset, MatchOffset)] -> (Text, Text, Text, [Text])
matchedParts [] = (Text
T.empty,Text
T.empty,Text
tx,[]) -- no information
    matchedParts (mtchd :: (MatchOffset, MatchOffset)
mtchd@(MatchOffset
start,MatchOffset
stop):[(MatchOffset, MatchOffset)]
rst) =
        ( MatchOffset -> Text -> Text
T.take MatchOffset
start Text
tx
        , (MatchOffset, MatchOffset) -> Text
getSub (MatchOffset, MatchOffset)
mtchd
        , MatchOffset -> Text -> Text
T.drop MatchOffset
stop Text
tx
        , ((MatchOffset, MatchOffset) -> Text)
-> [(MatchOffset, MatchOffset)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (MatchOffset, MatchOffset) -> Text
getSub [(MatchOffset, MatchOffset)]
rst
        )

    getSub :: (MatchOffset, MatchOffset) -> Text
getSub (MatchOffset
start,MatchOffset
stop)
        | MatchOffset
start MatchOffset -> MatchOffset -> Bool
forall a. Eq a => a -> a -> Bool
== MatchOffset
unusedOffset = Text
T.empty
        | Bool
otherwise             = MatchOffset -> Text -> Text
T.take (MatchOffset
stopMatchOffset -> MatchOffset -> MatchOffset
forall a. Num a => a -> a -> a
-MatchOffset
start) (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. MatchOffset -> Text -> Text
T.drop MatchOffset
start (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text
tx


-- ---------------------------------------------------------------------
-- helpers

unwrap :: (Show e) => Either e v -> IO v
unwrap :: Either e v -> IO v
unwrap Either e v
x = case Either e v
x of
  Left  e
e -> String -> IO v
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> IO v) -> String -> IO v
forall a b. (a -> b) -> a -> b
$ String
"Text.Regex.PCRE.Text died: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ e -> String
forall a. Show a => a -> String
show e
e
  Right v
v -> v -> IO v
forall (m :: * -> *) a. Monad m => a -> m a
return v
v

{-# INLINE asCString #-}
asCString :: T.Text -> (CString->IO a) -> IO a
asCString :: Text -> (CString -> IO a) -> IO a
asCString Text
t
  | Text -> Bool
T.null Text
t Bool -> Bool -> Bool
|| (Char -> MatchOffset
ord (Text -> Char
T.last Text
t) MatchOffset -> MatchOffset -> Bool
forall a. Eq a => a -> a -> Bool
/= MatchOffset
0) = ByteString -> (CString -> IO a) -> IO a
forall a. ByteString -> (CString -> IO a) -> IO a
B.useAsCString (ByteString -> (CString -> IO a) -> IO a)
-> ByteString -> (CString -> IO a) -> IO a
forall a b. (a -> b) -> a -> b
$ Text -> ByteString
T.encodeUtf8 Text
t
  | Bool
otherwise = ByteString -> (CString -> IO a) -> IO a
forall a. ByteString -> (CString -> IO a) -> IO a
B.unsafeUseAsCString (ByteString -> (CString -> IO a) -> IO a)
-> ByteString -> (CString -> IO a) -> IO a
forall a b. (a -> b) -> a -> b
$ Text -> ByteString
T.encodeUtf8 Text
t

{-# INLINE asCStringLen #-}
asCStringLen :: T.Text -> (CStringLen->IO a) -> IO a
asCStringLen :: Text -> (CStringLen -> IO a) -> IO a
asCStringLen Text
s CStringLen -> IO a
op     = ByteString -> (CStringLen -> IO a) -> IO a
forall a. ByteString -> (CStringLen -> IO a) -> IO a
B.unsafeUseAsCStringLen (Text -> ByteString
T.encodeUtf8 Text
s) CStringLen -> IO a
checked
  where
    checked :: CStringLen -> IO a
checked cs :: CStringLen
cs@(CString
ptr,MatchOffset
_)
      | CString
ptr CString -> CString -> Bool
forall a. Eq a => a -> a -> Bool
== CString
forall a. Ptr a
nullPtr = ByteString -> (CStringLen -> IO a) -> IO a
forall a. ByteString -> (CStringLen -> IO a) -> IO a
B.unsafeUseAsCStringLen ByteString
myEmpty ((CStringLen -> IO a) -> IO a) -> (CStringLen -> IO a) -> IO a
forall a b. (a -> b) -> a -> b
$ CStringLen -> IO a
op (CStringLen -> IO a)
-> (CStringLen -> CStringLen) -> CStringLen -> IO a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CStringLen -> CStringLen
forall b a b. Num b => (a, b) -> (a, b)
trim
      | Bool
otherwise      = CStringLen -> IO a
op CStringLen
cs

    trim :: (a, b) -> (a, b)
trim (a
ptr,b
_) = (a
ptr,b
0)

myEmpty :: B.ByteString
myEmpty :: ByteString
myEmpty = [Word8] -> ByteString
B.pack [Word8
0]