module Text.Regex.PCRE.Heavy (
(=~)
, scan
, scanO
, scanRanges
, scanRangesO
, sub
, subO
, gsub
, gsubO
, split
, splitO
, re
, mkRegexQQ
, Regex
, PCREOption
, PCRE.compileM
, rawMatch
, rawSub
) where
import Language.Haskell.TH hiding (match)
import Language.Haskell.TH.Quote
import Language.Haskell.TH.Syntax
import qualified Text.Regex.PCRE.Light as PCRE
import Text.Regex.PCRE.Light.Base
import Control.Applicative ((<$>))
import Data.Maybe (isJust, fromMaybe)
import Data.List (unfoldr, mapAccumL)
import Data.Stringable
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Internal as BS
import System.IO.Unsafe (unsafePerformIO)
import Foreign
substr :: BS.ByteString -> (Int, Int) -> BS.ByteString
substr s (f, t) = BS.take (t f) . BS.drop f $ s
behead :: [a] -> (a, [a])
behead (h:t) = (h, t)
behead [] = error "no head to behead"
reMatch :: Stringable a => Regex -> a -> Bool
reMatch r s = isJust $ PCRE.match r (toByteString s) []
(=~) :: Stringable a => a -> Regex -> Bool
(=~) = flip reMatch
rawMatch :: Regex -> BS.ByteString -> Int -> [PCREExecOption] -> Maybe [(Int, Int)]
rawMatch r@(Regex pcreFp _) s offset opts = unsafePerformIO $ do
withForeignPtr pcreFp $ \pcrePtr -> do
let nCapt = PCRE.captureCount r
ovecSize = (nCapt + 1) * 3
ovecBytes = ovecSize * size_of_cint
allocaBytes ovecBytes $ \ovec -> do
let (strFp, off, len) = BS.toForeignPtr s
withForeignPtr strFp $ \strPtr -> do
results <- c_pcre_exec pcrePtr nullPtr (strPtr `plusPtr` off) (fromIntegral len) (fromIntegral offset)
(combineExecOptions opts) ovec (fromIntegral ovecSize)
if results < 0 then return Nothing
else
let loop n o acc =
if n == results then return $ Just $ reverse acc
else do
i <- peekElemOff ovec $! o
j <- peekElemOff ovec (o + 1)
loop (n + 1) (o + 2) ((fromIntegral i, fromIntegral j) : acc)
in loop 0 0 []
nextMatch :: Regex -> [PCREExecOption] -> BS.ByteString -> Int -> Maybe ([(Int, Int)], Int)
nextMatch r opts str offset =
case rawMatch r str offset opts of
Nothing -> Nothing
Just [] -> Nothing
Just ms -> Just (ms, maximum $ map snd ms)
scan :: (Stringable a) => Regex -> a -> [(a, [a])]
scan r s = scanO r [] s
scanO :: (Stringable a) => Regex -> [PCREExecOption] -> a -> [(a, [a])]
scanO r opts s = map behead $ map (fromByteString . substr str) <$> unfoldr (nextMatch r opts str) 0
where str = toByteString s
scanRanges :: (Stringable a) => Regex -> a -> [((Int, Int), [(Int, Int)])]
scanRanges r s = scanRangesO r [] s
scanRangesO :: Stringable a => Regex -> [PCREExecOption] -> a -> [((Int, Int), [(Int, Int)])]
scanRangesO r opts s = map behead $ unfoldr (nextMatch r opts str) 0
where str = toByteString s
class RegexReplacement a where
performReplacement :: BS.ByteString -> [BS.ByteString] -> a -> BS.ByteString
instance Stringable a => RegexReplacement a where
performReplacement _ _ to = toByteString to
instance Stringable a => RegexReplacement (a -> [a] -> a) where
performReplacement from groups replacer = toByteString $ replacer (fromByteString from) (map fromByteString groups)
instance Stringable a => RegexReplacement (a -> a) where
performReplacement from _ replacer = toByteString $ replacer (fromByteString from)
instance Stringable a => RegexReplacement ([a] -> a) where
performReplacement _ groups replacer = toByteString $ replacer (map fromByteString groups)
rawSub :: RegexReplacement r => Regex -> r -> BS.ByteString -> Int -> [PCREExecOption] -> Maybe (BS.ByteString, Int)
rawSub r t s offset opts =
case rawMatch r s offset opts of
Just ((begin, end):groups) ->
Just (BS.concat [ substr s (0, begin)
, performReplacement (substr s (begin, end)) (map (substr s) groups) t
, substr s (end, BS.length s)], end)
_ -> Nothing
sub :: (Stringable a, RegexReplacement r) => Regex -> r -> a -> a
sub r t s = subO r [] t s
subO :: (Stringable a, RegexReplacement r) => Regex -> [PCREExecOption] -> r -> a -> a
subO r opts t s = fromMaybe s $ fromByteString <$> fst <$> rawSub r t (toByteString s) 0 opts
gsub :: (Stringable a, RegexReplacement r) => Regex -> r -> a -> a
gsub r t s = gsubO r [] t s
gsubO :: (Stringable a, RegexReplacement r) => Regex -> [PCREExecOption] -> r -> a -> a
gsubO r opts t s = fromByteString $ loop 0 str
where str = toByteString s
loop offset acc =
case rawSub r t acc offset opts of
Just (result, newOffset) -> loop newOffset result
_ -> acc
split :: Stringable a => Regex -> a -> [a]
split r s = splitO r [] s
splitO :: Stringable a => Regex -> [PCREExecOption] -> a -> [a]
splitO r opts s = map fromByteString $ map' (substr str) partRanges
where map' f = foldr ((:) . f) [f (lastL, BS.length str)]
(lastL, partRanges) = mapAccumL invRange 0 ranges
invRange acc (xl, xr) = (xr, (acc, xl))
ranges = map fst $ scanRangesO r opts str
str = toByteString s
instance Lift PCREOption where
lift o = let o' = show o in [| read o' :: PCREOption |]
quoteExpRegex :: [PCREOption] -> String -> ExpQ
quoteExpRegex opts txt = [| PCRE.compile (toByteString (txt :: String)) opts |]
where !_ = PCRE.compile (toByteString txt) opts
mkRegexQQ :: [PCREOption] -> QuasiQuoter
mkRegexQQ opts = QuasiQuoter
{ quoteExp = quoteExpRegex opts
, quotePat = undefined
, quoteType = undefined
, quoteDec = undefined }
re :: QuasiQuoter
re = mkRegexQQ [utf8]