module Text.Regex.PCRE.ByteString.Utils
(
substitute
, split
, substituteCompile
, splitCompile
, Regex
, CompOption
, ExecOption
, compBlank
, execBlank
, compile'
, execute'
, substitute'
, split'
, substituteCompile'
, splitCompile'
) where
import Text.Regex.PCRE.ByteString
import qualified Data.ByteString.Char8 as BS
import Control.Monad.Except
import Data.Attoparsec.ByteString.Char8 as A
import Control.Applicative
import Data.Char (digitToInt)
import System.IO.Unsafe
import qualified Data.Array as A
substitute :: Regex
-> BS.ByteString
-> BS.ByteString
-> IO (Either String BS.ByteString)
substitute regexp srcstring repla = runExceptT $ do
parsedReplacement <- case parseOnly repparser repla of
Right y -> return y
Left rr -> throwError (rr ++ " when parsing the replacement string")
matches <- getMatches regexp srcstring
let applyReplacement :: RegexpSplit BS.ByteString -> BS.ByteString
applyReplacement (Unmatched x) = x
applyReplacement (Matched captured mcaptures) = applyCaptures captured parsedReplacement mcaptures
return $! BS.concat $! map applyReplacement matches
applyCaptures :: BS.ByteString -> [Replacement] -> [BS.ByteString] -> BS.ByteString
applyCaptures firstmatch repl mcaptures = BS.concat (map applyCaptures' repl)
where
ncaptures = length mcaptures
applyCaptures' :: Replacement -> BS.ByteString
applyCaptures' WholeMatch = firstmatch
applyCaptures' (RawReplacement r) = r
applyCaptures' (IndexedReplacement idx)
| idx > ncaptures || idx < 0 = ""
| otherwise = mcaptures !! (idx 1)
split :: Regex
-> BS.ByteString
-> IO (Either String [BS.ByteString])
split regexp srcstring = fmap (removeEmptyLeft . regexpUnmatched) <$> runExceptT (getMatches regexp srcstring)
where
removeEmptyLeft = reverse . dropWhile BS.null . reverse
substitute' :: Regex
-> BS.ByteString
-> BS.ByteString
-> Either String BS.ByteString
substitute' regexp srcstring repla = unsafePerformIO (substitute regexp srcstring repla)
split' :: Regex
-> BS.ByteString
-> Either String [BS.ByteString]
split' regexp srcstring = unsafePerformIO (split regexp srcstring)
data RegexpSplit a = Matched a [a]
| Unmatched a
deriving (Show, Eq, Ord)
instance Functor RegexpSplit where
fmap f (Matched x y) = Matched (f x) (map f y)
fmap f (Unmatched x) = Unmatched (f x)
regexpAll :: [RegexpSplit a] -> [a]
regexpAll = map unreg
where
unreg ( Matched x _ ) = x
unreg ( Unmatched x ) = x
isMatched :: RegexpSplit a -> Bool
isMatched (Matched _ _) = True
isMatched _ = False
regexpUnmatched :: [RegexpSplit a] -> [a]
regexpUnmatched = regexpAll . filter (not . isMatched)
getMatches :: Regex -> BS.ByteString -> ExceptT String IO [RegexpSplit BS.ByteString]
getMatches _ "" = return []
getMatches creg src = do
x <- liftIO $ regexec creg src
case x of
Left (rcode, rerror) -> throwError ("Regexp application error: " ++ rerror ++ "(" ++ show rcode ++ ")")
Right Nothing -> return [Unmatched src]
Right (Just ("","",rm,_)) -> return (map (Unmatched . BS.singleton) (BS.unpack rm))
Right (Just (before,current,remaining,captures)) -> do
remain <- getMatches creg remaining
return (Unmatched before : Matched current captures : remain)
data Replacement = RawReplacement BS.ByteString
| IndexedReplacement Int
| WholeMatch
deriving (Show)
repparser :: Parser [Replacement]
repparser = many replacement <* endOfInput
replacement :: Parser Replacement
replacement = fmap RawReplacement rawData <|> escapedThing
rawData :: Parser BS.ByteString
rawData = takeWhile1 (/= '\\')
escapedThing :: Parser Replacement
escapedThing = do
void (char '\\')
let ac = do
n <- anyChar
r <- rawData
return $ BS.cons n r
toReplacement 0 = WholeMatch
toReplacement n = IndexedReplacement n
fmap (toReplacement . digitToInt) digit <|> fmap (RawReplacement . BS.cons '\\') ac
substituteCompile :: BS.ByteString
-> BS.ByteString
-> BS.ByteString
-> IO (Either String BS.ByteString)
substituteCompile regexp srcstring repla = do
re <- compile compBlank execBlank regexp
case re of
Right cre -> substitute cre srcstring repla
Left rr -> return $ Left $ "Regexp compilation failed: " ++ show rr
splitCompile :: BS.ByteString
-> BS.ByteString
-> IO (Either String [BS.ByteString])
splitCompile regexp srcstring = do
re <- compile compBlank execBlank regexp
case re of
Right cre -> split cre srcstring
Left rr -> return $ Left $ "Regexp compilation failed: " ++ show rr
compile' :: CompOption -> ExecOption -> BS.ByteString -> Either (MatchOffset, String) Regex
compile' co eo s = unsafePerformIO (compile co eo s)
execute' :: Regex -> BS.ByteString -> Either WrapError (Maybe (A.Array Int (MatchOffset, MatchLength)))
execute' r s = unsafePerformIO (execute r s)
substituteCompile' :: BS.ByteString
-> BS.ByteString
-> BS.ByteString
-> Either String BS.ByteString
substituteCompile' regexp srcstring repla = unsafePerformIO (substituteCompile regexp srcstring repla)
splitCompile' :: BS.ByteString
-> BS.ByteString
-> Either String [BS.ByteString]
splitCompile' regexp srcstring = unsafePerformIO (splitCompile regexp srcstring)