{-# OPTIONS_GHC -fno-warn-orphans #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  Text.Regex.Posix.ByteString.Lazy
-- Copyright   :  (c) Chris Kuklewicz 2007
-- License     :  BSD-3-Clause
--
-- Maintainer  :  hvr@gnu.org, Andreas Abel
-- Stability   :  stable
-- Portability :  non-portable (regex-base needs MPTC+FD)
--
-- This provides 'ByteString.Lazy' instances for RegexMaker and RegexLike
-- based on "Text.Regex.Posix.Wrap", and a (RegexContext Regex
-- ByteString ByteString) instance.
--
-- To use these instance, you would normally import
-- "Text.Regex.Posix".  You only need to import this module to use
-- the medium level API of the compile, regexec, and execute
-- functions.  All of these report error by returning Left values
-- instead of undefined or error or fail.
--
-- A Lazy ByteString with more than one chunk cannot be be passed to
-- the library efficiently (as a pointer).  It will have to converted
-- via a full copy to a temporary normal bytestring (with a null byte
-- appended if necessary).
-----------------------------------------------------------------------------

module Text.Regex.Posix.ByteString.Lazy(
  -- ** Types
  Regex,
  MatchOffset,
  MatchLength,
  ReturnCode,
  WrapError,
  -- ** Miscellaneous
  unusedOffset,
  -- ** Medium level API functions
  compile,
  execute,
  regexec,
  -- ** Compilation options
  CompOption(CompOption),
  compBlank,
  compExtended,   -- use extended regex syntax
  compIgnoreCase, -- ignore case when matching
  compNoSub,      -- no substring matching needed
  compNewline,    -- '.' doesn't match newline
  -- ** Execution options
  ExecOption(ExecOption),
  execBlank,
  execNotBOL,      -- not at begining of line
  execNotEOL       -- not at end of line
  ) where

import Prelude hiding (fail)
import Control.Monad.Fail (MonadFail(fail))

import Data.Array(Array)
import qualified Data.ByteString.Lazy as L (ByteString,null,toChunks,fromChunks,last,snoc)
import qualified Data.ByteString as B(ByteString,concat)
import qualified Data.ByteString.Unsafe as B(unsafeUseAsCString)
import System.IO.Unsafe(unsafePerformIO)
import Text.Regex.Base.RegexLike(RegexMaker(..),RegexContext(..),RegexLike(..),MatchOffset,MatchLength)
import Text.Regex.Posix.Wrap -- all
import qualified Text.Regex.Posix.ByteString as BS(execute,regexec)
import Text.Regex.Base.Impl(polymatch,polymatchM)
import Foreign.C.String(CString)

instance RegexContext Regex L.ByteString L.ByteString where
  match :: Regex -> ByteString -> ByteString
match = Regex -> ByteString -> ByteString
forall a b. RegexLike a b => a -> b -> b
polymatch
  matchM :: forall (m :: * -> *).
MonadFail m =>
Regex -> ByteString -> m ByteString
matchM = Regex -> ByteString -> m ByteString
forall a b (m :: * -> *).
(RegexLike a b, MonadFail m) =>
a -> b -> m b
polymatchM

fromLazy :: L.ByteString -> B.ByteString
fromLazy :: ByteString -> ByteString
fromLazy = [ByteString] -> ByteString
B.concat ([ByteString] -> ByteString)
-> (ByteString -> [ByteString]) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks

toLazy :: B.ByteString -> L.ByteString
toLazy :: ByteString -> ByteString
toLazy = [ByteString] -> ByteString
L.fromChunks ([ByteString] -> ByteString)
-> (ByteString -> [ByteString]) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
forall (m :: * -> *) a. Monad m => a -> m a
return

unwrap :: (Show e) => Either e v -> IO v
unwrap :: forall e v. Show e => Either e v -> IO v
unwrap Either e v
x = case Either e v
x of Left e
err -> String -> IO v
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String
"Text.Regex.Posix.ByteString.Lazy died: "String -> String -> String
forall a. [a] -> [a] -> [a]
++ e -> String
forall a. Show a => a -> String
show e
err)
                     Right v
v -> v -> IO v
forall (m :: * -> *) a. Monad m => a -> m a
return v
v

{-# INLINE asCString #-}
asCString :: L.ByteString -> (CString -> IO a) -> IO a
asCString :: forall a. ByteString -> (CString -> IO a) -> IO a
asCString ByteString
s = if (Bool -> Bool
not (ByteString -> Bool
L.null ByteString
s)) Bool -> Bool -> Bool
&& (Word8
0Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
==ByteString -> Word8
L.last ByteString
s)
                then ByteString -> (CString -> IO a) -> IO a
forall a. ByteString -> (CString -> IO a) -> IO a
B.unsafeUseAsCString (ByteString -> ByteString
fromLazy ByteString
s)
                else ByteString -> (CString -> IO a) -> IO a
forall a. ByteString -> (CString -> IO a) -> IO a
B.unsafeUseAsCString (ByteString -> ByteString
fromLazy (ByteString -> Word8 -> ByteString
L.snoc ByteString
s Word8
0))

instance RegexMaker Regex CompOption ExecOption L.ByteString where
  makeRegexOpts :: CompOption -> ExecOption -> ByteString -> Regex
makeRegexOpts CompOption
c ExecOption
e ByteString
pattern = IO Regex -> Regex
forall a. IO a -> a
unsafePerformIO (IO Regex -> Regex) -> IO Regex -> Regex
forall a b. (a -> b) -> a -> b
$ CompOption
-> ExecOption -> ByteString -> IO (Either WrapError Regex)
compile CompOption
c ExecOption
e ByteString
pattern IO (Either WrapError Regex)
-> (Either WrapError Regex -> IO Regex) -> IO Regex
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Either WrapError Regex -> IO Regex
forall e v. Show e => Either e v -> IO v
unwrap
  makeRegexOptsM :: forall (m :: * -> *).
MonadFail m =>
CompOption -> ExecOption -> ByteString -> m Regex
makeRegexOptsM CompOption
c ExecOption
e ByteString
pattern = (WrapError -> m Regex)
-> (Regex -> m Regex) -> Either WrapError Regex -> m Regex
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (String -> m Regex
forall (m :: * -> *) a. MonadFail m => String -> m a
fail(String -> m Regex)
-> (WrapError -> String) -> WrapError -> m Regex
forall b c a. (b -> c) -> (a -> b) -> a -> c
.WrapError -> String
forall a. Show a => a -> String
show) Regex -> m Regex
forall (m :: * -> *) a. Monad m => a -> m a
return (Either WrapError Regex -> m Regex)
-> Either WrapError Regex -> m Regex
forall a b. (a -> b) -> a -> b
$ IO (Either WrapError Regex) -> Either WrapError Regex
forall a. IO a -> a
unsafePerformIO (IO (Either WrapError Regex) -> Either WrapError Regex)
-> IO (Either WrapError Regex) -> Either WrapError Regex
forall a b. (a -> b) -> a -> b
$ CompOption
-> ExecOption -> ByteString -> IO (Either WrapError Regex)
compile CompOption
c ExecOption
e ByteString
pattern

instance RegexLike Regex L.ByteString where
  matchTest :: Regex -> ByteString -> Bool
matchTest Regex
regex ByteString
bs = IO Bool -> Bool
forall a. IO a -> a
unsafePerformIO (IO Bool -> Bool) -> IO Bool -> Bool
forall a b. (a -> b) -> a -> b
$
    ByteString
-> (CString -> IO (Either WrapError Bool))
-> IO (Either WrapError Bool)
forall a. ByteString -> (CString -> IO a) -> IO a
asCString ByteString
bs (Regex -> CString -> IO (Either WrapError Bool)
wrapTest Regex
regex) IO (Either WrapError Bool)
-> (Either WrapError Bool -> IO Bool) -> IO Bool
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=  Either WrapError Bool -> IO Bool
forall e v. Show e => Either e v -> IO v
unwrap
  matchOnce :: Regex -> ByteString -> Maybe MatchArray
matchOnce Regex
regex ByteString
bs = IO (Maybe MatchArray) -> Maybe MatchArray
forall a. IO a -> a
unsafePerformIO (IO (Maybe MatchArray) -> Maybe MatchArray)
-> IO (Maybe MatchArray) -> Maybe MatchArray
forall a b. (a -> b) -> a -> b
$
    Regex -> ByteString -> IO (Either WrapError (Maybe MatchArray))
execute Regex
regex ByteString
bs IO (Either WrapError (Maybe MatchArray))
-> (Either WrapError (Maybe MatchArray) -> IO (Maybe MatchArray))
-> IO (Maybe MatchArray)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Either WrapError (Maybe MatchArray) -> IO (Maybe MatchArray)
forall e v. Show e => Either e v -> IO v
unwrap
  matchAll :: Regex -> ByteString -> [MatchArray]
matchAll Regex
regex ByteString
bs = IO [MatchArray] -> [MatchArray]
forall a. IO a -> a
unsafePerformIO (IO [MatchArray] -> [MatchArray])
-> IO [MatchArray] -> [MatchArray]
forall a b. (a -> b) -> a -> b
$
    ByteString
-> (CString -> IO (Either WrapError [MatchArray]))
-> IO (Either WrapError [MatchArray])
forall a. ByteString -> (CString -> IO a) -> IO a
asCString ByteString
bs (Regex -> CString -> IO (Either WrapError [MatchArray])
wrapMatchAll Regex
regex) IO (Either WrapError [MatchArray])
-> (Either WrapError [MatchArray] -> IO [MatchArray])
-> IO [MatchArray]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Either WrapError [MatchArray] -> IO [MatchArray]
forall e v. Show e => Either e v -> IO v
unwrap
  matchCount :: Regex -> ByteString -> Int
matchCount Regex
regex ByteString
bs = IO Int -> Int
forall a. IO a -> a
unsafePerformIO (IO Int -> Int) -> IO Int -> Int
forall a b. (a -> b) -> a -> b
$
    ByteString
-> (CString -> IO (Either WrapError Int))
-> IO (Either WrapError Int)
forall a. ByteString -> (CString -> IO a) -> IO a
asCString ByteString
bs (Regex -> CString -> IO (Either WrapError Int)
wrapCount Regex
regex) IO (Either WrapError Int)
-> (Either WrapError Int -> IO Int) -> IO Int
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Either WrapError Int -> IO Int
forall e v. Show e => Either e v -> IO v
unwrap

-- ---------------------------------------------------------------------
-- | Compiles a regular expression
--
compile :: CompOption    -- ^ Flags (summed together)
        -> ExecOption    -- ^ Flags (summed together)
        -> L.ByteString  -- ^ The regular expression to compile
        -> IO (Either WrapError Regex)      -- ^ Returns: the compiled regular expression
compile :: CompOption
-> ExecOption -> ByteString -> IO (Either WrapError Regex)
compile CompOption
c ExecOption
e ByteString
pattern = ByteString
-> (CString -> IO (Either WrapError Regex))
-> IO (Either WrapError Regex)
forall a. ByteString -> (CString -> IO a) -> IO a
asCString ByteString
pattern (CompOption -> ExecOption -> CString -> IO (Either WrapError Regex)
wrapCompile CompOption
c ExecOption
e)

-- ---------------------------------------------------------------------
-- | Matches a regular expression against a buffer, returning the buffer
-- indicies of the match, and any submatches
--
-- | Matches a regular expression against a string
execute :: Regex      -- ^ Compiled regular expression
        -> L.ByteString -- ^ String to match against
        -> IO (Either WrapError (Maybe (Array Int (MatchOffset,MatchLength))))
                -- ^ Returns: 'Nothing' if the regex did not match the
                -- string, or:
                --   'Just' an array of (offset,length) pairs where index 0 is whole match, and the rest are the captured subexpressions.
execute :: Regex -> ByteString -> IO (Either WrapError (Maybe MatchArray))
execute Regex
regex ByteString
bs = if (Bool -> Bool
not (ByteString -> Bool
L.null ByteString
bs)) Bool -> Bool -> Bool
&& (Word8
0Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
==ByteString -> Word8
L.last ByteString
bs)
                     then Regex -> ByteString -> IO (Either WrapError (Maybe MatchArray))
BS.execute Regex
regex (ByteString -> ByteString
fromLazy ByteString
bs)
                     else Regex -> ByteString -> IO (Either WrapError (Maybe MatchArray))
BS.execute Regex
regex (ByteString -> ByteString
fromLazy (ByteString -> Word8 -> ByteString
L.snoc ByteString
bs Word8
0))

regexec :: Regex      -- ^ Compiled regular expression
        -> L.ByteString -- ^ String to match against
        -> IO (Either WrapError (Maybe (L.ByteString, L.ByteString, L.ByteString, [L.ByteString])))
regexec :: Regex
-> ByteString
-> IO
     (Either
        WrapError
        (Maybe (ByteString, ByteString, ByteString, [ByteString])))
regexec Regex
regex ByteString
bs = do
  Either
  WrapError
  (Maybe (ByteString, ByteString, ByteString, [ByteString]))
x <- if (Bool -> Bool
not (ByteString -> Bool
L.null ByteString
bs)) Bool -> Bool -> Bool
&& (Word8
0Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
==ByteString -> Word8
L.last ByteString
bs)
         then Regex
-> ByteString
-> IO
     (Either
        WrapError
        (Maybe (ByteString, ByteString, ByteString, [ByteString])))
BS.regexec Regex
regex (ByteString -> ByteString
fromLazy ByteString
bs)
         else Regex
-> ByteString
-> IO
     (Either
        WrapError
        (Maybe (ByteString, ByteString, ByteString, [ByteString])))
BS.regexec Regex
regex (ByteString -> ByteString
fromLazy (ByteString -> Word8 -> ByteString
L.snoc ByteString
bs Word8
0))
  Either
  WrapError
  (Maybe (ByteString, ByteString, ByteString, [ByteString]))
-> IO
     (Either
        WrapError
        (Maybe (ByteString, ByteString, ByteString, [ByteString])))
forall (m :: * -> *) a. Monad m => a -> m a
return (Either
   WrapError
   (Maybe (ByteString, ByteString, ByteString, [ByteString]))
 -> IO
      (Either
         WrapError
         (Maybe (ByteString, ByteString, ByteString, [ByteString]))))
-> Either
     WrapError
     (Maybe (ByteString, ByteString, ByteString, [ByteString]))
-> IO
     (Either
        WrapError
        (Maybe (ByteString, ByteString, ByteString, [ByteString])))
forall a b. (a -> b) -> a -> b
$ case Either
  WrapError
  (Maybe (ByteString, ByteString, ByteString, [ByteString]))
x of
             Left WrapError
e -> WrapError
-> Either
     WrapError
     (Maybe (ByteString, ByteString, ByteString, [ByteString]))
forall a b. a -> Either a b
Left WrapError
e
             Right Maybe (ByteString, ByteString, ByteString, [ByteString])
Nothing -> Maybe (ByteString, ByteString, ByteString, [ByteString])
-> Either
     WrapError
     (Maybe (ByteString, ByteString, ByteString, [ByteString]))
forall a b. b -> Either a b
Right Maybe (ByteString, ByteString, ByteString, [ByteString])
forall a. Maybe a
Nothing
             Right (Just (ByteString
a,ByteString
b,ByteString
c,[ByteString]
ds)) -> Maybe (ByteString, ByteString, ByteString, [ByteString])
-> Either
     WrapError
     (Maybe (ByteString, ByteString, ByteString, [ByteString]))
forall a b. b -> Either a b
Right ((ByteString, ByteString, ByteString, [ByteString])
-> Maybe (ByteString, ByteString, ByteString, [ByteString])
forall a. a -> Maybe a
Just (ByteString -> ByteString
toLazy ByteString
a,ByteString -> ByteString
toLazy ByteString
b,ByteString -> ByteString
toLazy ByteString
c,(ByteString -> ByteString) -> [ByteString] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map ByteString -> ByteString
toLazy [ByteString]
ds))

unusedOffset :: Int
unusedOffset :: Int
unusedOffset = RegOffset -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral RegOffset
unusedRegOffset