module SimpleRegex ( Regex , submatches , compile , compileWithError , ByteString , regexSplit , catSubmatches ) where import qualified Text.Regex.Posix.ByteString.Lazy as R import Data.ByteString.Lazy.Char8 as L hiding (putStrLn, map, take, concat, zipWith) import Data.Either import Data.Maybe import Text.Regex.Base.RegexLike import Control.Monad import System.IO.Unsafe type Regex = R.Regex -- very simple matching functions for lazy bytestream regexp submatches :: Regex -> ByteString -> Maybe [ByteString] submatches r s = let m = unsafePerformIO (R.regexec r s) in case m of Right (Just (_,_,_,ms)) -> Just ms _ -> Nothing catSubmatches :: Regex -> [ByteString] -> [([ByteString], ByteString)] catSubmatches r ss = let sms = map (submatches r) ss in catMaybes $ zipWith (\le ri -> le >>= (\le' -> return (le', ri))) sms ss regexSplit :: Regex -> ByteString -> [ByteString] regexSplit r s = let m = unsafePerformIO (R.regexec r s) in case m of Right (Just (bef,_,aft,_)) -> if L.length bef > 0 then (bef : regexSplit r aft) else [aft] _ -> [s] compile :: String -> IO Regex compile rx = do (Right r) <- R.compile defaultCompOpt defaultExecOpt $ pack rx return r compileWithError :: String -> IO (Either String Regex) compileWithError rx = do r <- R.compile defaultCompOpt defaultExecOpt $ pack rx return $ case r of (Right r') -> Right r' (Left (rc, rs)) -> Left ("Error: " ++ rs) -- tests {- test1 = do rx <- compile ";" let teststr = L.pack ";;123;3456;234;" res = regexSplit rx teststr putStrLn (show res) test2 = do rx <- compile ";" let testlines = L.pack ";;123;3456;234;\na;;asd;;24;das" res = (map (regexSplit rx)) (L.lines testlines) putStrLn (show res) test3 = do rx <- compile ";" let ff d = catSubmatches rx d res = ff (L.lines testdata) putStrLn (show (take 200 res)) test4 = do rx <- compile " ;" let ff d = map (regexSplit rx) d res = ff (L.lines testdata) putStrLn (take 200 (show res)) test5 = do rx <- compile "" let testlines = L.pack ";;123;3456;234;\na;;asd;;24;das" res = (map (regexSplit rx)) (L.lines testlines) putStrLn (show res) testdata = L.unlines $ L.pack <$> [concat (map ((++ ";") . show) [x * 10 .. 9 + x * 10]) | x <- [1..10]] -}