{-# OPTIONS_GHC -fglasgow-exts -fffi -fno-warn-unused-imports #-} -- | http://laurikari.net/tre/index.html http://laurikari.net/tre/api.html -- -- This will fail or error only if allocation fails or a nullPtr is passed in. {- Copyright : (c) Chris Kuklewicz 2007 -} module Text.Regex.TRE.Wrap( -- ** High-level interface Regex, CompOption(CompOption), ExecOption(ExecOption), (=~), (=~~), -- ** Low-level interface RegOffset, ReturnCode(ReturnCode), WrapError, wrapCompile, wrapTest, wrapMatch, wrapMatchAll, wrapCount, -- ** Miscellaneous getVersion, getNumSubs, unusedRegOffset, -- ** CompOption values compBlank, compExtended, -- use extended regex syntax compIgnoreCase, -- ignore case when matching compNoSub, -- no substring matching needed compNewline, -- '.' doesn't match newline compRightAssoc, -- flip from left to right assoc -- ** ExecOption values execBlank, execNotBOL, -- not at begining of line execNotEOL, -- not at end of line -- ** ReturnCode values retOk, retBadbr, retBadpat, retBadrpt, retEcollate, retEctype, retEescape, retEsubreg, retEbrack, retEparen, retEbrace, retErange, retEspace ) where #if defined(HAVE_TRE_H) import Control.Monad(when) import Data.Int import Data.Array(listArray) import Data.Bits(Bits((.|.))) -- ((.&.),(.|.),complement)) import Foreign import Foreign.C(CInt,CChar,CSize) import Foreign.C.String(CString,CStringLen,peekCString) import Text.Regex.Base.RegexLike(RegexOptions(..),RegexMaker(..),RegexContext(..),MatchArray) #else import Data.Array(Array) import Data.Bits(Bits) import Foreign(ForeignPtr) import Foreign.C(CInt) import Foreign.C.String(CString,CStringLen) import Text.Regex.Base.RegexLike(RegexOptions(..),RegexMaker(..),RegexContext(..),MatchArray) #endif -- | return version of libtre used or Nothing if libtre is not available. getVersion :: Maybe String type CRegMatch = () -- dummy regmatch_t used below to read out so and eo values type Regex_t = () -- regex_t placeholder type RegOffset = (#type regoff_t) newtype CompOption = CompOption CInt deriving (Eq,Show,Num,Bits) newtype ExecOption = ExecOption CInt deriving (Eq,Show,Num,Bits) newtype ReturnCode = ReturnCode CInt deriving (Eq,Show) -- | A compiled regular expression data Regex = Regex (ForeignPtr Regex_t) CompOption ExecOption type WrapError = (ReturnCode,String) wrapCompile :: CompOption -- ^ Flags (summed together) -> ExecOption -- ^ Flags (summed together) -> CStringLen -- ^ The regular expression to compile -> IO (Either WrapError Regex) -- ^ Returns: an error offset and string or the compiled regular expression wrapTest :: Regex -- ^ Compiled regular expression -> CStringLen -- ^ String to match against and length in bytes -> IO (Either WrapError Bool) wrapMatch :: Regex -- ^ Compiled regular expression -> CStringLen -- ^ String to match against and length in bytes -> IO (Either WrapError (Maybe [(RegOffset,RegOffset)])) -- ^ Returns: 'Right Nothing' if the regex did not match the -- string, or: -- 'Right Just' an array of (offset,length) pairs where index 0 is whole match, and the rest are the captured subexpressions, or: -- 'Left WrapError' if there is some strange error wrapMatchAll :: Regex -> CStringLen -> IO (Either WrapError [ MatchArray ]) wrapCount :: Regex -> CStringLen -> IO (Either WrapError Int) getNumSubs :: Regex -> Int (=~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target) => source1 -> source -> target (=~~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target,Monad m) => source1 -> source -> m target compBlank :: CompOption execBlank :: ExecOption unusedRegOffset :: RegOffset retOk :: ReturnCode #if defined(HAVE_TRE_H) #include compBlank = CompOption 0 execBlank = ExecOption 0 unusedRegOffset = (-1) retOk = ReturnCode 0 fi :: (Integral i,Num n ) => i -> n fi x = fromIntegral x {-# INLINE getNumSubs #-} getNumSubs (Regex r _ _) = fi . unsafePerformIO $ withForeignPtr r getNumSubs' getNumSubs' :: Ptr Regex_t -> IO CSize {-# INLINE getNumSubs' #-} getNumSubs' x = (#peek regex_t,re_nsub) x size_Regex_t :: Int size_Regex_t = (#size regex_t) instance RegexOptions Regex CompOption ExecOption where blankCompOpt = compBlank blankExecOpt = execBlank defaultCompOpt = compExtended .|. compNewline defaultExecOpt = execBlank setExecOpts e' (Regex r c _) = Regex r c e' getExecOpts (Regex _ _ e) = e -- (=~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target) => source1 -> source -> target (=~) x r = let q :: Regex q = makeRegex r in match q x -- (=~~) ::(RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target,Monad m) => source1 -> source -> m target (=~~) x r = do (q :: Regex) <- makeRegexM r matchM q x nullTest :: Ptr a -> String -> IO (Either WrapError b) -> IO (Either WrapError b) {-# INLINE nullTest #-} nullTest ptr msg io = do if nullPtr == ptr then return (Left (retOk,"Ptr parameter was nullPtr in Text.Regex.TRE.Wrap."++msg)) else io wrapRC :: ReturnCode -> IO (Either WrapError b) {-# INLINE wrapRC #-} wrapRC r = return (Left (r,"Error in Text.Regex.TRE.Wrap: "++show r)) -- | Compiles a regular expression wrapCompile flags e (pattern,len) = do nullTest pattern "wrapCompile pattern" $ do fregex <- newForeignPtr finalizerFree =<< mallocBytes size_Regex_t withForeignPtr fregex $ \regex -> do if regex == nullPtr then return (Left (retOk,"Could not malloc regex in Text.Regex.TRE.Wrap.wrapCompile")) else do ret <- c_regncomp regex pattern (fi len) flags if ret == retOk then return (Right (Regex fregex flags e)) else wrapRC ret wrapTest (Regex fregex _ flags) (cstr,len) = do nullTest cstr "wrapTest cstr" $ do withForeignPtr fregex $ \regex -> do r@(ReturnCode r') <- c_regnexec regex cstr (fi len) 0 nullPtr flags if r == retNoMatch then return (Right False) else if r' < 0 then wrapRC r else return (Right True) -- | Matches a regular expression against a string wrapMatch (Regex fregex _ flags) (cstr,len) = do nullTest cstr "wrapMatch cstr" $ do withForeignPtr fregex $ \regex -> do nsub <- getNumSubs' regex let nmatch = 1 + fi nsub pmatch_bytes = nmatch * (#size regmatch_t) allocaBytes pmatch_bytes $ \pmatch -> do r@(ReturnCode r') <- c_regnexec regex cstr (fi len) (succ nsub) pmatch flags if r == retNoMatch then return (Right Nothing) else if r' < 0 then wrapRC r else do regions <- mapM getOffsets . take nmatch . iterate (`plusPtr` (#size regmatch_t)) $ pmatch return (Right (Just regions)) -- regions will not be [] -- | wrapMatchAll is an improvement over wrapMatch since it only -- allocates memory with allocaBytes once at the start. -- -- wrapMatchAll (Regex fregex _ flags) full_source = do nullTest (fst full_source) "wrapMatchAll source" $ do withForeignPtr fregex $ \regex -> do nsub <- getNumSubs' regex let nmatch = 1 + fi nsub pmatch_bytes = (nmatch) * (#size regmatch_t) flags' = (execNotBOL .|. flags) allocaBytes pmatch_bytes $ \pmatch -> let loop acc flags_in_use (source,len) pos | pos `seq` len `seq` source `seq` False = undefined | otherwise = do r@(ReturnCode r') <- c_regnexec regex source (fi len) (succ nsub) pmatch flags_in_use if r == retNoMatch then return (Right (acc [])) else if r' < 0 then wrapRC r else do start_ends <- mapM getOffsets . take nmatch . iterate (`plusPtr` (#size regmatch_t)) $ pmatch let start_offs = map (\(s,e) -> (pos + fi s,fi (e-s))) start_ends :: [(Int,Int)] arr = listArray (0,fi nsub) start_offs acc' = acc . (arr:) delta = fi (snd (head start_ends)) :: Int pos' = pos + delta :: Int source' = plusPtr source delta :: CString len' = len - delta :: Int if (arr `seq` delta) == 0 then return (Right (acc' [])) else loop acc' flags' (source',len') pos' in loop id flags full_source 0 getOffsets :: Ptr CRegMatch -> IO (RegOffset,RegOffset) getOffsets p_match = do start <- (#peek regmatch_t, rm_so) p_match :: IO RegOffset end <- (#peek regmatch_t, rm_eo) p_match :: IO RegOffset return (start,end) wrapCount (Regex fregex _ flags) (in_source,in_len) = do nullTest in_source "wrapCount source" $ do withForeignPtr fregex $ \regex -> allocaBytes (#size regmatch_t) $ \pmatch -> do nullTest pmatch "wrapCount pmatch" $ let flags' = (execNotBOL .|. flags) loop flags_in_use (source,len) count | count `seq` False = undefined | otherwise = do r@(ReturnCode r') <- c_regnexec regex source len 1 pmatch flags_in_use if r == retNoMatch then return (Right count) else if r'<0 then wrapRC r else do (start,end) <- getOffsets pmatch -- (start == unusedRegOffset) check omitted let len' = len - fi end source' = plusPtr source (fi end) if end > start then loop flags' (source',len') (succ count) else return (Right (succ count)) in loop flags (in_source,fi in_len) 0 getVersion = unsafePerformIO $ do version <- c_tre_version if version == nullPtr then return (Just "tre_version was null") else return . Just =<< peekCString version foreign import ccall unsafe "tre/regex.h regncomp" c_regncomp :: Ptr Regex_t -> CString -> CSize -> CompOption -> IO ReturnCode foreign import ccall unsafe "tre/regex.h tre_version" c_tre_version :: IO (Ptr CChar) foreign import ccall unsafe "tre/regex.h regnexec" c_regnexec :: Ptr Regex_t -> CString -> CSize -> CSize -> Ptr CRegMatch -> ExecOption -> IO ReturnCode {- newtype InfoWhat = InfoWhat CInt deriving (Eq,Show) newtype ConfigWhat = ConfigWhat CInt deriving (Eq,Show) foreign import ccall unsafe "tre/regex.h tre_config" c_tre_config :: ConfigWhat -> Ptr a -> IO ReturnCode -} -- Flags for regexec #enum ExecOption,ExecOption, \ execNotBOL = REG_NOTBOL, \ execNotEOL = REG_NOTEOL -- Flags for regcomp #enum CompOption,CompOption, \ compExtended = REG_EXTENDED, \ compIgnoreCase = REG_ICASE, \ compNoSub = REG_NOSUB, \ compNewline = REG_NEWLINE, \ compRightAssoc = REG_RIGHT_ASSOC -- Return values from regexec (REG_NOMATCH, REG_ESPACE,...) -- Error codes from regcomp (not REG_NOMATCH) -- Though calling retNoMatch an error is rather missing the point... #enum ReturnCode,ReturnCode, \ retNoMatch = REG_NOMATCH, \ retBadbr = REG_BADBR, \ retBadpat = REG_BADPAT, \ retBadrpt = REG_BADRPT, \ retEcollate = REG_ECOLLATE, \ retEctype = REG_ECTYPE, \ retEescape = REG_EESCAPE, \ retEsubreg = REG_ESUBREG, \ retEbrack = REG_EBRACK, \ retEparen = REG_EPAREN, \ retEbrace = REG_EBRACE, \ retErange = REG_ERANGE, \ retEspace = REG_ESPACE #else /* do not HAVE_TRE_H */ instance RegexOptions Regex CompOption ExecOption where blankCompOpt = err blankExecOpt = err defaultCompOpt = err defaultExecOpt = err getExecOpts = err setExecOpts = err msg :: String msg = "WrapTre.hsc was not compiled against libtre regex library with HAVE_TRE_H defined" err :: a err = error msg (=~) = err (=~~) = err -- Hack to avoid the constructor from being unused wrapCompile _ _ _ = err >> return (Right (Regex err err err)) wrapTest = err wrapMatch = err wrapMatchAll = err wrapCount = err compExtended,compIgnoreCase,compNoSub,compNewline :: CompOption compBlank = err compExtended = err compIgnoreCase = err compNoSub = err compNewline = err execNotBOL,execNotEOL :: ExecOption execBlank = err execNotBOL = err execNotEOL = err retBadbr, retBadpat, retBadrpt, retEcollate, retEctype, retEescape, retEsubreg, retEbrack, retEparen, retEbrace, retErange, retEspace :: ReturnCode retBadbr = err retBadpat = err retBadrpt = err retEcollate = err retEctype = err retEescape = err retEsubreg = err retEbrack = err retEparen = err retEbrace = err retErange = err retEspace = err getVersion = Nothing #endif /* HAVE_TRE_H */