module Data.Text.ICU.Regex.Internal
(
MatchOption(..)
, Haystack(..)
, Regex(..)
, URegularExpression
, emptyForeignPtr
, regex
, uregex_clone
, uregex_close
, uregex_end
, uregex_find
, uregex_findNext
, uregex_getText
, uregex_group
, uregex_groupCount
, uregex_pattern
, uregex_setText
, uregex_start
) where
import Control.Monad (when)
import Data.IORef (IORef, newIORef)
import Data.Int (Int32)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Foreign as T
import Data.Text.ICU.Internal (UBool, UChar)
import Data.Text.ICU.Error (isRegexError)
import Data.Text.ICU.Error.Internal (UParseError, UErrorCode,
handleError, handleParseError)
import Data.Typeable (Typeable)
import Data.Word (Word16, Word32)
import Foreign.ForeignPtr (ForeignPtr, newForeignPtr, touchForeignPtr, withForeignPtr)
import Foreign.Ptr (FunPtr, Ptr)
import Prelude hiding (catch)
import System.IO.Unsafe (unsafePerformIO)
data MatchOption
= CaseInsensitive
| Comments
| DotAll
| Literal
| Multiline
| HaskellLines
| UnicodeWord
| ErrorOnUnknownEscapes
| WorkLimit Int
| StackLimit Int
deriving (Eq, Show, Typeable)
data Haystack = H (ForeignPtr Word16) !T.I16
data Regex = Regex {
reRe :: ForeignPtr URegularExpression
, reText :: IORef Haystack
}
emptyForeignPtr :: ForeignPtr Word16
emptyForeignPtr = unsafePerformIO $ fst `fmap` T.asForeignPtr T.empty
regex :: [MatchOption] -> Text -> IO Regex
regex opts pat = T.useAsPtr pat $ \pptr plen -> do
let (flags,workLimit,stackLimit) = toURegexpOpts opts
ptr <- handleParseError isRegexError $
uregex_open pptr (fromIntegral plen) flags
refp <- newForeignPtr uregex_close ptr
(hayfp, hayLen) <- T.asForeignPtr T.empty
withForeignPtr refp $ \rePtr ->
withForeignPtr hayfp $ \hayPtr -> handleError $
uregex_setText rePtr hayPtr (fromIntegral hayLen)
when (workLimit > 1) .
handleError $ uregex_setTimeLimit ptr (fromIntegral workLimit)
when (stackLimit > 1) .
handleError $ uregex_setStackLimit ptr (fromIntegral stackLimit)
touchForeignPtr refp
Regex refp `fmap` newIORef (H hayfp 0)
data URegularExpression
type URegexpFlag = Word32
toURegexpOpts :: [MatchOption] -> (URegexpFlag,Int,Int)
toURegexpOpts = foldl go (0,1,1)
where
go (!flag,work,stack) opt = (flag+flag',work',stack')
where
flag' = case opt of
CaseInsensitive -> 2
Comments -> 4
DotAll -> 32
Literal -> 16
Multiline -> 8
HaskellLines -> 1
UnicodeWord -> 256
ErrorOnUnknownEscapes -> 512
_ -> 0
work' = case opt of
WorkLimit limit -> limit
_ -> work
stack' = case opt of
StackLimit limit -> limit
_ -> stack
foreign import ccall unsafe "hs_text_icu.h __hs_uregex_open" uregex_open
:: Ptr UChar -> Int32 -> Word32 -> Ptr UParseError -> Ptr UErrorCode
-> IO (Ptr URegularExpression)
foreign import ccall unsafe "hs_text_icu.h &__hs_uregex_close" uregex_close
:: FunPtr (Ptr URegularExpression -> IO ())
foreign import ccall unsafe "hs_text_icu.h __hs_uregex_clone" uregex_clone
:: Ptr URegularExpression -> Ptr UErrorCode
-> IO (Ptr URegularExpression)
foreign import ccall unsafe "hs_text_icu.h __hs_uregex_pattern" uregex_pattern
:: Ptr URegularExpression -> Ptr Int32 -> Ptr UErrorCode
-> IO (Ptr UChar)
foreign import ccall unsafe "hs_text_icu.h __hs_uregex_setText" uregex_setText
:: Ptr URegularExpression -> Ptr UChar -> Int32 -> Ptr UErrorCode -> IO ()
foreign import ccall unsafe "hs_text_icu.h __hs_uregex_getText" uregex_getText
:: Ptr URegularExpression -> Ptr Int32 -> Ptr UErrorCode -> IO (Ptr UChar)
foreign import ccall unsafe "hs_text_icu.h __hs_uregex_find" uregex_find
:: Ptr URegularExpression -> Int32 -> Ptr UErrorCode -> IO UBool
foreign import ccall unsafe "hs_text_icu.h __hs_uregex_findNext" uregex_findNext
:: Ptr URegularExpression -> Ptr UErrorCode -> IO UBool
foreign import ccall unsafe "hs_text_icu.h __hs_uregex_start" uregex_start
:: Ptr URegularExpression -> Int32 -> Ptr UErrorCode -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_uregex_end" uregex_end
:: Ptr URegularExpression -> Int32 -> Ptr UErrorCode -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_uregex_groupCount" uregex_groupCount
:: Ptr URegularExpression -> Ptr UErrorCode -> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_uregex_group" uregex_group
:: Ptr URegularExpression -> Int32 -> Ptr UChar -> Int32 -> Ptr UErrorCode
-> IO Int32
foreign import ccall unsafe "hs_text_icu.h __hs_uregex_setTimeLimit" uregex_setTimeLimit
:: Ptr URegularExpression -> Int32 -> Ptr UErrorCode -> IO ()
foreign import ccall unsafe "hs_text_icu.h __hs_uregex_setStackLimit" uregex_setStackLimit
:: Ptr URegularExpression -> Int32 -> Ptr UErrorCode -> IO ()