{-# OPTIONS_GHC -fno-warn-orphans #-} -- | -- Copyright: (c) 2006-2017, Chris Dornan and Christopher Kuklewicz -- SPDX-License-Identifier: BSD-3-Clause -- -- This exports instances of the high level API and the medium level API of 'compile','execute', and 'regexec'. -- -- @since 0.95.0 module Text.Regex.PCRE.Text ( -- * Types Regex , MatchOffset , MatchLength , CompOption(CompOption) , ExecOption(ExecOption) , ReturnCode , WrapError -- * Miscellaneous , unusedOffset , getVersion -- * Medium level API functions , compile , execute , regexec -- * CompOption flags , compBlank , compAnchored , compAutoCallout , compCaseless , compDollarEndOnly , compDotAll , compExtended , compExtra , compFirstLine , compMultiline , compNoAutoCapture , compUngreedy , compUTF8 , compNoUTF8Check -- * ExecOption flags , execBlank , execAnchored , execNotBOL , execNotEOL , execNotEmpty , execNoUTF8Check , execPartial ) where import Data.Array(Array,listArray) import qualified Data.ByteString as B import qualified Data.ByteString.Unsafe as B import qualified Data.Text as T import qualified Data.Text.Encoding as T import Foreign.C.String(CStringLen,CString) import Foreign(nullPtr) import System.IO.Unsafe (unsafePerformIO) import Text.Regex.Base.Impl import Text.Regex.Base.RegexLike import Text.Regex.PCRE.Wrap instance RegexContext Regex T.Text T.Text where match = polymatch matchM = polymatchM instance RegexMaker Regex CompOption ExecOption T.Text where makeRegexOpts c e pat = unsafePerformIO $ compile c e pat >>= unwrap makeRegexOptsM c e pat = either (fail.show) return $ unsafePerformIO $ compile c e pat instance RegexLike Regex T.Text where matchTest re tx = unsafePerformIO $ asCStringLen tx (wrapTest 0 re) >>= unwrap matchOnce re tx = unsafePerformIO $ execute re tx >>= unwrap matchAll re tx = unsafePerformIO $ asCStringLen tx (wrapMatchAll re) >>= unwrap matchCount re tx = unsafePerformIO $ asCStringLen tx (wrapCount re) >>= unwrap -- --------------------------------------------------------------------- -- | Compiles a regular expression compile :: CompOption -- ^ (summed together) -> ExecOption -- ^ (summed together) -> T.Text -- ^ The regular expression to compile -> IO (Either (MatchOffset,String) Regex) -- ^ Returns: the compiled regular expression compile c e pat = -- PCRE does not allow one to specify a length for the regular expression, it must by 0 terminated asCString pat $ wrapCompile c e -- --------------------------------------------------------------------- -- | Matches a regular expression against a string execute :: Regex -- ^ Compiled regular expression -> T.Text -- ^ Text to match against -> IO (Either WrapError (Maybe (Array Int (MatchOffset,MatchLength)))) -- ^ Returns: 'Nothing' if the regex did not match the -- string, or 'Just' an array of (offset,length) pairs -- where index 0 is whole match, and the rest are the -- captured subexpressions. execute re tx = do maybeStartEnd <- asCStringLen tx (wrapMatch 0 re) case maybeStartEnd of Right Nothing -> return $ Right Nothing Right (Just parts) -> return $ Right $ Just $ listArray (0,pred $ length parts) [ (s,e-s) | (s,e) <- parts ] Left err -> return $ Left err -- --------------------------------------------------------------------- -- | Matches a regular expression against a string regexec :: Regex -- ^ Compiled regular expression -> T.Text -- ^ Text to match against -> IO (Either WrapError (Maybe (T.Text, T.Text, T.Text, [T.Text]))) -- ^ Returns: 'Nothing' if the regex did not match the -- string, or 'Just' text including before and after text regexec re tx = do mb <- asCStringLen tx $ wrapMatch 0 re case mb of Right Nothing -> return (Right Nothing) Right (Just parts) -> return . Right . Just . matchedParts $ parts Left err -> return (Left err) where matchedParts [] = (T.empty,T.empty,tx,[]) -- no information matchedParts (mtchd@(start,stop):rst) = ( T.take start tx , getSub mtchd , T.drop stop tx , map getSub rst ) getSub (start,stop) | start == unusedOffset = T.empty | otherwise = T.take (stop-start) . T.drop start $ tx -- --------------------------------------------------------------------- -- helpers unwrap :: (Show e) => Either e v -> IO v unwrap x = case x of Left e -> fail $ "Text.Regex.PCRE.Text died: " ++ show e Right v -> return v {-# INLINE asCString #-} asCString :: T.Text -> (CString->IO a) -> IO a asCString = B.unsafeUseAsCString . T.encodeUtf8 {-# INLINE asCStringLen #-} asCStringLen :: T.Text -> (CStringLen->IO a) -> IO a asCStringLen s op = B.unsafeUseAsCStringLen (T.encodeUtf8 s) checked where checked cs@(ptr,_) | ptr == nullPtr = B.unsafeUseAsCStringLen myEmpty $ op . trim | otherwise = op cs trim (ptr,_) = (ptr,0) myEmpty :: B.ByteString myEmpty = B.pack [0]