{-# OPTIONS_GHC -fno-warn-orphans #-}
{-|
This exports instances of the high level API and the medium level
API of 'compile','execute', and 'regexec'.
-}
{- Copyright   :  (c) Chris Kuklewicz 2007 -}
module Text.Regex.PCRE.ByteString(
  -- ** Types
  Regex,
  MatchOffset,
  MatchLength,
  CompOption(CompOption),
  ExecOption(ExecOption),
  ReturnCode,
  WrapError,
  -- ** Miscellaneous
  unusedOffset,
  getVersion,
  -- ** Medium level API functions
  compile,
  execute,
  regexec,
  -- ** CompOption flags
  compBlank,
  compAnchored,
  compAutoCallout,
  compCaseless,
  compDollarEndOnly,
  compDotAll,
  compExtended,
  compExtra,
  compFirstLine,
  compMultiline,
  compNoAutoCapture,
  compUngreedy,
  compUTF8,
  compNoUTF8Check,
  -- ** ExecOption flags
  execBlank,
  execAnchored,
  execNotBOL,
  execNotEOL,
  execNotEmpty,
  execNoUTF8Check,
  execPartial
  ) where

import Prelude hiding (fail)
import Control.Monad.Fail (MonadFail(fail))

import Text.Regex.PCRE.Wrap -- all
import Data.Array(Array,listArray)
import Data.ByteString(ByteString)
import qualified Data.ByteString as B(empty,useAsCString,last,take,drop,null,pack)
import qualified Data.ByteString.Unsafe as B(unsafeUseAsCString,unsafeUseAsCStringLen)
import System.IO.Unsafe(unsafePerformIO)
import Text.Regex.Base.RegexLike(RegexContext(..),RegexMaker(..),RegexLike(..),MatchOffset,MatchLength)
import Text.Regex.Base.Impl(polymatch,polymatchM)
import Foreign.C.String(CStringLen)
import Foreign(nullPtr)

instance RegexContext Regex ByteString ByteString where
  match :: Regex -> ByteString -> ByteString
match = Regex -> ByteString -> ByteString
forall a b. RegexLike a b => a -> b -> b
polymatch
  matchM :: Regex -> ByteString -> m ByteString
matchM = Regex -> ByteString -> m ByteString
forall a b (m :: * -> *).
(RegexLike a b, MonadFail m) =>
a -> b -> m b
polymatchM

unwrap :: (Show e) => Either e v -> IO v
unwrap :: Either e v -> IO v
unwrap Either e v
x = case Either e v
x of Left e
err -> String -> IO v
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String
"Text.Regex.PCRE.ByteString died: "String -> String -> String
forall a. [a] -> [a] -> [a]
++ e -> String
forall a. Show a => a -> String
show e
err)
                     Right v
v -> v -> IO v
forall (m :: * -> *) a. Monad m => a -> m a
return v
v

{-# INLINE asCStringLen #-}
asCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
asCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
asCStringLen ByteString
s CStringLen -> IO a
op = ByteString -> (CStringLen -> IO a) -> IO a
forall a. ByteString -> (CStringLen -> IO a) -> IO a
B.unsafeUseAsCStringLen ByteString
s CStringLen -> IO a
checked
  where checked :: CStringLen -> IO a
checked cs :: CStringLen
cs@(Ptr CChar
ptr,Int
_) | Ptr CChar
ptr Ptr CChar -> Ptr CChar -> Bool
forall a. Eq a => a -> a -> Bool
== Ptr CChar
forall a. Ptr a
nullPtr = ByteString -> (CStringLen -> IO a) -> IO a
forall a. ByteString -> (CStringLen -> IO a) -> IO a
B.unsafeUseAsCStringLen ByteString
myEmpty (CStringLen -> IO a
op (CStringLen -> IO a)
-> (CStringLen -> CStringLen) -> CStringLen -> IO a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CStringLen -> CStringLen
forall b a b. Num b => (a, b) -> (a, b)
trim)
                           | Bool
otherwise = CStringLen -> IO a
op CStringLen
cs
        myEmpty :: ByteString
myEmpty = [Word8] -> ByteString
B.pack [Word8
0]
        trim :: (a, b) -> (a, b)
trim (a
ptr,b
_) = (a
ptr,b
0)

instance RegexMaker Regex CompOption ExecOption ByteString where
  makeRegexOpts :: CompOption -> ExecOption -> ByteString -> Regex
makeRegexOpts CompOption
c ExecOption
e ByteString
pattern = IO Regex -> Regex
forall a. IO a -> a
unsafePerformIO (IO Regex -> Regex) -> IO Regex -> Regex
forall a b. (a -> b) -> a -> b
$
    CompOption
-> ExecOption -> ByteString -> IO (Either (Int, String) Regex)
compile CompOption
c ExecOption
e ByteString
pattern IO (Either (Int, String) Regex)
-> (Either (Int, String) Regex -> IO Regex) -> IO Regex
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Either (Int, String) Regex -> IO Regex
forall e v. Show e => Either e v -> IO v
unwrap
  makeRegexOptsM :: CompOption -> ExecOption -> ByteString -> m Regex
makeRegexOptsM CompOption
c ExecOption
e ByteString
pattern = ((Int, String) -> m Regex)
-> (Regex -> m Regex) -> Either (Int, String) Regex -> m Regex
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (String -> m Regex
forall (m :: * -> *) a. MonadFail m => String -> m a
fail(String -> m Regex)
-> ((Int, String) -> String) -> (Int, String) -> m Regex
forall b c a. (b -> c) -> (a -> b) -> a -> c
.(Int, String) -> String
forall a. Show a => a -> String
show) Regex -> m Regex
forall (m :: * -> *) a. Monad m => a -> m a
return (Either (Int, String) Regex -> m Regex)
-> Either (Int, String) Regex -> m Regex
forall a b. (a -> b) -> a -> b
$ IO (Either (Int, String) Regex) -> Either (Int, String) Regex
forall a. IO a -> a
unsafePerformIO (IO (Either (Int, String) Regex) -> Either (Int, String) Regex)
-> IO (Either (Int, String) Regex) -> Either (Int, String) Regex
forall a b. (a -> b) -> a -> b
$
    CompOption
-> ExecOption -> ByteString -> IO (Either (Int, String) Regex)
compile CompOption
c ExecOption
e ByteString
pattern

instance RegexLike Regex ByteString where
  matchTest :: Regex -> ByteString -> Bool
matchTest Regex
regex ByteString
bs = IO Bool -> Bool
forall a. IO a -> a
unsafePerformIO (IO Bool -> Bool) -> IO Bool -> Bool
forall a b. (a -> b) -> a -> b
$
    ByteString
-> (CStringLen -> IO (Either WrapError Bool))
-> IO (Either WrapError Bool)
forall a. ByteString -> (CStringLen -> IO a) -> IO a
asCStringLen ByteString
bs (Int -> Regex -> CStringLen -> IO (Either WrapError Bool)
wrapTest Int
0 Regex
regex) IO (Either WrapError Bool)
-> (Either WrapError Bool -> IO Bool) -> IO Bool
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Either WrapError Bool -> IO Bool
forall e v. Show e => Either e v -> IO v
unwrap
  matchOnce :: Regex -> ByteString -> Maybe MatchArray
matchOnce Regex
regex ByteString
bs = IO (Maybe MatchArray) -> Maybe MatchArray
forall a. IO a -> a
unsafePerformIO (IO (Maybe MatchArray) -> Maybe MatchArray)
-> IO (Maybe MatchArray) -> Maybe MatchArray
forall a b. (a -> b) -> a -> b
$
    Regex -> ByteString -> IO (Either WrapError (Maybe MatchArray))
execute Regex
regex ByteString
bs IO (Either WrapError (Maybe MatchArray))
-> (Either WrapError (Maybe MatchArray) -> IO (Maybe MatchArray))
-> IO (Maybe MatchArray)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Either WrapError (Maybe MatchArray) -> IO (Maybe MatchArray)
forall e v. Show e => Either e v -> IO v
unwrap
  matchAll :: Regex -> ByteString -> [MatchArray]
matchAll Regex
regex ByteString
bs = IO [MatchArray] -> [MatchArray]
forall a. IO a -> a
unsafePerformIO (IO [MatchArray] -> [MatchArray])
-> IO [MatchArray] -> [MatchArray]
forall a b. (a -> b) -> a -> b
$
    ByteString
-> (CStringLen -> IO (Either WrapError [MatchArray]))
-> IO (Either WrapError [MatchArray])
forall a. ByteString -> (CStringLen -> IO a) -> IO a
asCStringLen ByteString
bs (Regex -> CStringLen -> IO (Either WrapError [MatchArray])
wrapMatchAll Regex
regex) IO (Either WrapError [MatchArray])
-> (Either WrapError [MatchArray] -> IO [MatchArray])
-> IO [MatchArray]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Either WrapError [MatchArray] -> IO [MatchArray]
forall e v. Show e => Either e v -> IO v
unwrap
  matchCount :: Regex -> ByteString -> Int
matchCount Regex
regex ByteString
bs = IO Int -> Int
forall a. IO a -> a
unsafePerformIO (IO Int -> Int) -> IO Int -> Int
forall a b. (a -> b) -> a -> b
$
    ByteString
-> (CStringLen -> IO (Either WrapError Int))
-> IO (Either WrapError Int)
forall a. ByteString -> (CStringLen -> IO a) -> IO a
asCStringLen ByteString
bs (Regex -> CStringLen -> IO (Either WrapError Int)
wrapCount Regex
regex) IO (Either WrapError Int)
-> (Either WrapError Int -> IO Int) -> IO Int
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Either WrapError Int -> IO Int
forall e v. Show e => Either e v -> IO v
unwrap

-- ---------------------------------------------------------------------
-- | Compiles a regular expression
--
compile :: CompOption  -- ^ (summed together)
        -> ExecOption  -- ^ (summed together)
        -> ByteString  -- ^ The regular expression to compile
        -> IO (Either (MatchOffset,String) Regex) -- ^ Returns: the compiled regular expression
compile :: CompOption
-> ExecOption -> ByteString -> IO (Either (Int, String) Regex)
compile CompOption
c ExecOption
e ByteString
pattern = do
  -- PCRE does not allow one to specify a length for the regular expression, it must by 0 terminated
  let asCString :: ByteString -> (Ptr CChar -> IO a) -> IO a
asCString ByteString
bs = if (Bool -> Bool
not (ByteString -> Bool
B.null ByteString
bs)) Bool -> Bool -> Bool
&& (Word8
0Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
==ByteString -> Word8
B.last ByteString
bs)
                       then ByteString -> (Ptr CChar -> IO a) -> IO a
forall a. ByteString -> (Ptr CChar -> IO a) -> IO a
B.unsafeUseAsCString ByteString
bs
                       else ByteString -> (Ptr CChar -> IO a) -> IO a
forall a. ByteString -> (Ptr CChar -> IO a) -> IO a
B.useAsCString ByteString
bs
  ByteString
-> (Ptr CChar -> IO (Either (Int, String) Regex))
-> IO (Either (Int, String) Regex)
forall a. ByteString -> (Ptr CChar -> IO a) -> IO a
asCString ByteString
pattern (CompOption
-> ExecOption -> Ptr CChar -> IO (Either (Int, String) Regex)
wrapCompile CompOption
c ExecOption
e)

-- ---------------------------------------------------------------------
-- | Matches a regular expression against a buffer, returning the buffer
-- indicies of the match, and any submatches
--
-- | Matches a regular expression against a string
execute :: Regex      -- ^ Compiled regular expression
        -> ByteString -- ^ String to match against
        -> IO (Either WrapError (Maybe (Array Int (MatchOffset,MatchLength))))
                -- ^ Returns: 'Nothing' if the regex did not match the
                -- string, or:
                --   'Just' an array of (offset,length) pairs where index 0 is whole match, and the rest are the captured subexpressions.
execute :: Regex -> ByteString -> IO (Either WrapError (Maybe MatchArray))
execute Regex
regex ByteString
bs = do
  Either WrapError (Maybe [(Int, Int)])
maybeStartEnd <- ByteString
-> (CStringLen -> IO (Either WrapError (Maybe [(Int, Int)])))
-> IO (Either WrapError (Maybe [(Int, Int)]))
forall a. ByteString -> (CStringLen -> IO a) -> IO a
asCStringLen ByteString
bs (Int
-> Regex
-> CStringLen
-> IO (Either WrapError (Maybe [(Int, Int)]))
wrapMatch Int
0 Regex
regex)
  case Either WrapError (Maybe [(Int, Int)])
maybeStartEnd of
    Right Maybe [(Int, Int)]
Nothing -> Either WrapError (Maybe MatchArray)
-> IO (Either WrapError (Maybe MatchArray))
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe MatchArray -> Either WrapError (Maybe MatchArray)
forall a b. b -> Either a b
Right Maybe MatchArray
forall a. Maybe a
Nothing)
    Right (Just [(Int, Int)]
parts) ->
      Either WrapError (Maybe MatchArray)
-> IO (Either WrapError (Maybe MatchArray))
forall (m :: * -> *) a. Monad m => a -> m a
return (Either WrapError (Maybe MatchArray)
 -> IO (Either WrapError (Maybe MatchArray)))
-> ([(Int, Int)] -> Either WrapError (Maybe MatchArray))
-> [(Int, Int)]
-> IO (Either WrapError (Maybe MatchArray))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Maybe MatchArray -> Either WrapError (Maybe MatchArray)
forall a b. b -> Either a b
Right (Maybe MatchArray -> Either WrapError (Maybe MatchArray))
-> ([(Int, Int)] -> Maybe MatchArray)
-> [(Int, Int)]
-> Either WrapError (Maybe MatchArray)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. MatchArray -> Maybe MatchArray
forall a. a -> Maybe a
Just (MatchArray -> Maybe MatchArray)
-> ([(Int, Int)] -> MatchArray) -> [(Int, Int)] -> Maybe MatchArray
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int, Int) -> [(Int, Int)] -> MatchArray
forall i e. Ix i => (i, i) -> [e] -> Array i e
listArray (Int
0,Int -> Int
forall a. Enum a => a -> a
pred ([(Int, Int)] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(Int, Int)]
parts))
      ([(Int, Int)] -> MatchArray)
-> ([(Int, Int)] -> [(Int, Int)]) -> [(Int, Int)] -> MatchArray
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Int, Int) -> (Int, Int)) -> [(Int, Int)] -> [(Int, Int)]
forall a b. (a -> b) -> [a] -> [b]
map (\(Int
s,Int
e)->(Int -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
s, Int -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
eInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
s))) ([(Int, Int)] -> IO (Either WrapError (Maybe MatchArray)))
-> [(Int, Int)] -> IO (Either WrapError (Maybe MatchArray))
forall a b. (a -> b) -> a -> b
$ [(Int, Int)]
parts
    Left WrapError
err -> Either WrapError (Maybe MatchArray)
-> IO (Either WrapError (Maybe MatchArray))
forall (m :: * -> *) a. Monad m => a -> m a
return (WrapError -> Either WrapError (Maybe MatchArray)
forall a b. a -> Either a b
Left WrapError
err)

regexec :: Regex      -- ^ Compiled regular expression
        -> ByteString -- ^ String to match against
        -> IO (Either WrapError (Maybe (ByteString, ByteString, ByteString, [ByteString])))
regexec :: Regex
-> ByteString
-> IO
     (Either
        WrapError
        (Maybe (ByteString, ByteString, ByteString, [ByteString])))
regexec Regex
regex ByteString
bs = do
  let getSub :: (Int, Int) -> ByteString
getSub (Int
start,Int
stop) | Int
start Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
unusedOffset = ByteString
B.empty
                          | Bool
otherwise = Int -> ByteString -> ByteString
B.take (Int
stopInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
start) (ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> ByteString -> ByteString
B.drop Int
start (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ ByteString
bs
      matchedParts :: [(Int, Int)] -> (ByteString, ByteString, ByteString, [ByteString])
matchedParts [] = (ByteString
B.empty,ByteString
B.empty,ByteString
bs,[]) -- no information
      matchedParts (matchedStartStop :: (Int, Int)
matchedStartStop@(Int
start,Int
stop):[(Int, Int)]
subStartStop) =
        (Int -> ByteString -> ByteString
B.take Int
start ByteString
bs
        ,(Int, Int) -> ByteString
getSub (Int, Int)
matchedStartStop
        ,Int -> ByteString -> ByteString
B.drop Int
stop ByteString
bs
        ,((Int, Int) -> ByteString) -> [(Int, Int)] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map (Int, Int) -> ByteString
getSub [(Int, Int)]
subStartStop)
  Either WrapError (Maybe [(Int, Int)])
maybeStartEnd <- ByteString
-> (CStringLen -> IO (Either WrapError (Maybe [(Int, Int)])))
-> IO (Either WrapError (Maybe [(Int, Int)]))
forall a. ByteString -> (CStringLen -> IO a) -> IO a
asCStringLen ByteString
bs (Int
-> Regex
-> CStringLen
-> IO (Either WrapError (Maybe [(Int, Int)]))
wrapMatch Int
0 Regex
regex)
  case Either WrapError (Maybe [(Int, Int)])
maybeStartEnd of
    Right Maybe [(Int, Int)]
Nothing -> Either
  WrapError
  (Maybe (ByteString, ByteString, ByteString, [ByteString]))
-> IO
     (Either
        WrapError
        (Maybe (ByteString, ByteString, ByteString, [ByteString])))
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe (ByteString, ByteString, ByteString, [ByteString])
-> Either
     WrapError
     (Maybe (ByteString, ByteString, ByteString, [ByteString]))
forall a b. b -> Either a b
Right Maybe (ByteString, ByteString, ByteString, [ByteString])
forall a. Maybe a
Nothing)
    Right (Just [(Int, Int)]
parts) -> Either
  WrapError
  (Maybe (ByteString, ByteString, ByteString, [ByteString]))
-> IO
     (Either
        WrapError
        (Maybe (ByteString, ByteString, ByteString, [ByteString])))
forall (m :: * -> *) a. Monad m => a -> m a
return (Either
   WrapError
   (Maybe (ByteString, ByteString, ByteString, [ByteString]))
 -> IO
      (Either
         WrapError
         (Maybe (ByteString, ByteString, ByteString, [ByteString]))))
-> ([(Int, Int)]
    -> Either
         WrapError
         (Maybe (ByteString, ByteString, ByteString, [ByteString])))
-> [(Int, Int)]
-> IO
     (Either
        WrapError
        (Maybe (ByteString, ByteString, ByteString, [ByteString])))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Maybe (ByteString, ByteString, ByteString, [ByteString])
-> Either
     WrapError
     (Maybe (ByteString, ByteString, ByteString, [ByteString]))
forall a b. b -> Either a b
Right (Maybe (ByteString, ByteString, ByteString, [ByteString])
 -> Either
      WrapError
      (Maybe (ByteString, ByteString, ByteString, [ByteString])))
-> ([(Int, Int)]
    -> Maybe (ByteString, ByteString, ByteString, [ByteString]))
-> [(Int, Int)]
-> Either
     WrapError
     (Maybe (ByteString, ByteString, ByteString, [ByteString]))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString, ByteString, ByteString, [ByteString])
-> Maybe (ByteString, ByteString, ByteString, [ByteString])
forall a. a -> Maybe a
Just ((ByteString, ByteString, ByteString, [ByteString])
 -> Maybe (ByteString, ByteString, ByteString, [ByteString]))
-> ([(Int, Int)]
    -> (ByteString, ByteString, ByteString, [ByteString]))
-> [(Int, Int)]
-> Maybe (ByteString, ByteString, ByteString, [ByteString])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Int, Int)] -> (ByteString, ByteString, ByteString, [ByteString])
matchedParts ([(Int, Int)]
 -> IO
      (Either
         WrapError
         (Maybe (ByteString, ByteString, ByteString, [ByteString]))))
-> [(Int, Int)]
-> IO
     (Either
        WrapError
        (Maybe (ByteString, ByteString, ByteString, [ByteString])))
forall a b. (a -> b) -> a -> b
$ [(Int, Int)]
parts
    Left WrapError
err -> Either
  WrapError
  (Maybe (ByteString, ByteString, ByteString, [ByteString]))
-> IO
     (Either
        WrapError
        (Maybe (ByteString, ByteString, ByteString, [ByteString])))
forall (m :: * -> *) a. Monad m => a -> m a
return (WrapError
-> Either
     WrapError
     (Maybe (ByteString, ByteString, ByteString, [ByteString]))
forall a b. a -> Either a b
Left WrapError
err)