module Text.Regex.PCRE.Heavy (
(=~)
, (≈)
, scan
, scanO
, scanRanges
, scanRangesO
, sub
, subO
, gsub
, gsubO
, split
, splitO
, re
, mkRegexQQ
, Regex
, PCREOption
, PCRE.compileM
, rawMatch
, rawSub
) where
#if __GLASGOW_HASKELL__ < 709
import Control.Applicative ((<$>))
#endif
import Language.Haskell.TH hiding (match)
import Language.Haskell.TH.Quote
import Language.Haskell.TH.Syntax
import qualified Text.Regex.PCRE.Light as PCRE
import Text.Regex.PCRE.Light.Base
import Data.Maybe (isJust, fromMaybe)
import Data.List (unfoldr, mapAccumL)
import Data.Stringable
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Internal as BS
import System.IO.Unsafe (unsafePerformIO)
import Foreign
import Debug.Trace
substr ∷ BS.ByteString → (Int, Int) → BS.ByteString
substr s (f, t) = BS.take (t f) . BS.drop f $ s
behead ∷ [a] → (a, [a])
behead (h:t) = (h, t)
behead [] = error "no head to behead"
reMatch ∷ Stringable a ⇒ Regex → a → Bool
reMatch r s = isJust $ PCRE.match r (toByteString s) []
(=~) ∷ Stringable a ⇒ a → Regex → Bool
(=~) = flip reMatch
(≈) ∷ Stringable a ⇒ a → Regex → Bool
(≈) = (=~)
rawMatch ∷ Regex → BS.ByteString → Int → [PCREExecOption] → Maybe [(Int, Int)]
rawMatch r@(Regex pcreFp _) s offset opts = unsafePerformIO $ do
withForeignPtr pcreFp $ \pcrePtr → do
let nCapt = PCRE.captureCount r
ovecSize = (nCapt + 1) * 3
ovecBytes = ovecSize * size_of_cint
allocaBytes ovecBytes $ \ovec → do
let (strFp, off, len) = BS.toForeignPtr s
withForeignPtr strFp $ \strPtr → do
results ← c_pcre_exec pcrePtr nullPtr (strPtr `plusPtr` off) (fromIntegral len) (fromIntegral offset)
(combineExecOptions opts) ovec (fromIntegral ovecSize)
if results < 0 then return Nothing
else
let loop n o acc =
if n == results then return $ Just $ reverse acc
else do
i ← peekElemOff ovec $! o
j ← peekElemOff ovec (o + 1)
loop (n + 1) (o + 2) ((fromIntegral i, fromIntegral j) : acc)
in loop 0 0 []
nextMatch ∷ Regex → [PCREExecOption] → BS.ByteString → Int → Maybe ([(Int, Int)], Int)
nextMatch r opts str offset =
case rawMatch r str offset opts of
Nothing → Nothing
Just [] → Nothing
Just ms → Just (ms, maximum $ map snd ms)
scan ∷ (Stringable a) ⇒ Regex → a → [(a, [a])]
scan r s = scanO r [] s
scanO ∷ (Stringable a) ⇒ Regex → [PCREExecOption] → a → [(a, [a])]
scanO r opts s = map behead $ map (fromByteString . substr str) <$> unfoldr (nextMatch r opts str) 0
where str = toByteString s
scanRanges ∷ (Stringable a) ⇒ Regex → a → [((Int, Int), [(Int, Int)])]
scanRanges r s = scanRangesO r [] s
scanRangesO ∷ Stringable a ⇒ Regex → [PCREExecOption] → a → [((Int, Int), [(Int, Int)])]
scanRangesO r opts s = map behead $ unfoldr (nextMatch r opts str) 0
where str = toByteString s
class RegexReplacement a where
performReplacement ∷ BS.ByteString → [BS.ByteString] → a → BS.ByteString
instance Stringable a ⇒ RegexReplacement a where
performReplacement _ _ to = toByteString to
instance Stringable a ⇒ RegexReplacement (a → [a] → a) where
performReplacement from groups replacer = toByteString $ replacer (fromByteString from) (map fromByteString groups)
instance Stringable a ⇒ RegexReplacement (a → a) where
performReplacement from _ replacer = toByteString $ replacer (fromByteString from)
instance Stringable a ⇒ RegexReplacement ([a] → a) where
performReplacement _ groups replacer = toByteString $ replacer (map fromByteString groups)
rawSub ∷ RegexReplacement r ⇒ Regex → r → BS.ByteString → Int → [PCREExecOption] → Maybe (BS.ByteString, Int)
rawSub r t s offset opts =
case rawMatch r s offset opts of
Just ((begin, end):groups) →
let replacement = performReplacement (substr s (begin, end)) (map (substr s) groups) t in
Just (BS.concat [ substr s (0, begin)
, replacement
, substr s (end, BS.length s)], begin + BS.length replacement)
_ → Nothing
sub ∷ (Stringable a, RegexReplacement r) ⇒ Regex → r → a → a
sub r t s = subO r [] t s
subO ∷ (Stringable a, RegexReplacement r) ⇒ Regex → [PCREExecOption] → r → a → a
subO r opts t s = fromMaybe s $ fromByteString <$> fst <$> rawSub r t (toByteString s) 0 opts
gsub ∷ (Stringable a, RegexReplacement r) ⇒ Regex → r → a → a
gsub r t s = gsubO r [] t s
gsubO ∷ (Stringable a, RegexReplacement r) ⇒ Regex → [PCREExecOption] → r → a → a
gsubO r opts t s = fromByteString $ loop 0 str
where str = toByteString s
loop offset acc =
case rawSub r t acc offset opts of
Just (result, newOffset) →
if newOffset == offset then acc else loop newOffset result
_ → acc
split ∷ Stringable a ⇒ Regex → a → [a]
split r s = splitO r [] s
splitO ∷ Stringable a ⇒ Regex → [PCREExecOption] → a → [a]
splitO r opts s = map fromByteString $ map' (substr str) partRanges
where map' f = foldr ((:) . f) [f (lastL, BS.length str)]
(lastL, partRanges) = mapAccumL invRange 0 ranges
invRange acc (xl, xr) = (xr, (acc, xl))
ranges = map fst $ scanRangesO r opts str
str = toByteString s
instance Lift PCREOption where
lift o = let o' = show o in [| read o' ∷ PCREOption |]
quoteExpRegex ∷ [PCREOption] → String → ExpQ
quoteExpRegex opts txt = [| PCRE.compile (toByteString (txt ∷ String)) opts |]
where !_ = PCRE.compile (toByteString txt) opts
mkRegexQQ ∷ [PCREOption] → QuasiQuoter
mkRegexQQ opts = QuasiQuoter
{ quoteExp = quoteExpRegex opts
, quotePat = undefined
, quoteType = undefined
, quoteDec = undefined }
re ∷ QuasiQuoter
re = mkRegexQQ [utf8]