module Text.Regex.PCRE.Heavy (
(=~)
, (≈)
, scan
, scanO
, scanRanges
, scanRangesO
, sub
, subO
, gsub
, gsubO
, split
, splitO
, re
, mkRegexQQ
, Regex
, PCREOption
, PCRE.compileM
, rawMatch
, rawSub
) where
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative ((<$>))
#endif
import Language.Haskell.TH hiding (match)
import Language.Haskell.TH.Quote
import Language.Haskell.TH.Syntax
import qualified Text.Regex.PCRE.Light as PCRE
import Text.Regex.PCRE.Light.Base
import Data.Maybe (isJust, fromMaybe)
import Data.List (unfoldr, mapAccumL)
import Data.String.Conversions
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Internal as BS
import System.IO.Unsafe (unsafePerformIO)
import Foreign (withForeignPtr, allocaBytes, nullPtr, plusPtr, peekElemOff)
substr ∷ SBS → (Int, Int) → SBS
substr s (f, t) = BS.take (t f) . BS.drop f $ s
behead ∷ [a] → (a, [a])
behead (h:t) = (h, t)
behead [] = error "no head to behead"
reMatch ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS) ⇒ Regex → a → Bool
reMatch r s = isJust $ PCRE.match r (cs s) []
(=~), (≈) ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS) ⇒ a → Regex → Bool
(=~) = flip reMatch
(≈) = (=~)
rawMatch ∷ Regex → SBS → Int → [PCREExecOption] → Maybe [(Int, Int)]
rawMatch r@(Regex pcreFp _) s offset opts = unsafePerformIO $ do
withForeignPtr pcreFp $ \pcrePtr → do
let nCapt = PCRE.captureCount r
ovecSize = (nCapt + 1) * 3
ovecBytes = ovecSize * size_of_cint
allocaBytes ovecBytes $ \ovec → do
let (strFp, off, len) = BS.toForeignPtr s
withForeignPtr strFp $ \strPtr → do
results ← c_pcre_exec pcrePtr nullPtr (strPtr `plusPtr` off) (fromIntegral len) (fromIntegral offset)
(combineExecOptions opts) ovec (fromIntegral ovecSize)
if results < 0 then return Nothing
else
let loop n o acc =
if n == results then return $ Just $ reverse acc
else do
i ← peekElemOff ovec $! o
j ← peekElemOff ovec (o + 1)
loop (n + 1) (o + 2) ((fromIntegral i, fromIntegral j) : acc)
in loop 0 0 []
nextMatch ∷ Regex → [PCREExecOption] → SBS → Int → Maybe ([(Int, Int)], Int)
nextMatch r opts str offset =
case rawMatch r str offset opts of
Nothing → Nothing
Just [] → Nothing
Just ms → Just (ms, maximum $ map snd ms)
scan ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS) ⇒ Regex → a → [(a, [a])]
scan r s = scanO r [] s
scanO ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS) ⇒ Regex → [PCREExecOption] → a → [(a, [a])]
scanO r opts s = map behead $ map (cs . substr str) <$> unfoldr (nextMatch r opts str) 0
where str = cs s
scanRanges ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS) ⇒ Regex → a → [((Int, Int), [(Int, Int)])]
scanRanges r s = scanRangesO r [] s
scanRangesO ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS) ⇒ Regex → [PCREExecOption] → a → [((Int, Int), [(Int, Int)])]
scanRangesO r opts s = map behead $ unfoldr (nextMatch r opts str) 0
where str = cs s
class RegexReplacement a where
performReplacement ∷ SBS → [SBS] → a → SBS
instance ConvertibleStrings a SBS ⇒ RegexReplacement a where
performReplacement _ _ to = cs to
instance (ConvertibleStrings SBS a, ConvertibleStrings a SBS) ⇒ RegexReplacement (a → [a] → a) where
performReplacement from groups replacer = cs $ replacer (cs from) (map cs groups)
instance (ConvertibleStrings SBS a, ConvertibleStrings a SBS) ⇒ RegexReplacement (a → a) where
performReplacement from _ replacer = cs $ replacer (cs from)
instance (ConvertibleStrings SBS a, ConvertibleStrings a SBS) ⇒ RegexReplacement ([a] → a) where
performReplacement _ groups replacer = cs $ replacer (map cs groups)
rawSub ∷ RegexReplacement r ⇒ Regex → r → SBS → Int → [PCREExecOption] → Maybe (SBS, Int)
rawSub r t s offset opts =
case rawMatch r s offset opts of
Just ((begin, end):groups) →
let replacement = performReplacement (substr s (begin, end)) (map (substr s) groups) t in
Just (BS.concat [ substr s (0, begin)
, replacement
, substr s (end, BS.length s)], begin + BS.length replacement)
_ → Nothing
sub ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS, RegexReplacement r) ⇒ Regex → r → a → a
sub r t s = subO r [] t s
subO ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS, RegexReplacement r) ⇒ Regex → [PCREExecOption] → r → a → a
subO r opts t s = fromMaybe s $ cs <$> fst <$> rawSub r t (cs s) 0 opts
gsub ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS, RegexReplacement r) ⇒ Regex → r → a → a
gsub r t s = gsubO r [] t s
gsubO ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS, RegexReplacement r) ⇒ Regex → [PCREExecOption] → r → a → a
gsubO r opts t s = cs $ loop 0 str
where str = cs s
loop offset acc =
case rawSub r t acc offset opts of
Just (result, newOffset) →
if newOffset == offset then acc else loop newOffset result
_ → acc
split ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS) ⇒ Regex → a → [a]
split r s = splitO r [] s
splitO ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS) ⇒ Regex → [PCREExecOption] → a → [a]
splitO r opts s = map cs $ map' (substr str) partRanges
where map' f = foldr ((:) . f) [f (lastL, BS.length str)]
(lastL, partRanges) = mapAccumL invRange 0 ranges
invRange acc (xl, xr) = (xr, (acc, xl))
ranges = map fst $ scanRangesO r opts str
str = cs s
instance Lift PCREOption where
lift o = let o' = show o in [| read o' ∷ PCREOption |]
quoteExpRegex ∷ [PCREOption] → String → ExpQ
quoteExpRegex opts txt = [| PCRE.compile (cs (txt ∷ String)) opts |]
where !_ = PCRE.compile (cs txt) opts
mkRegexQQ ∷ [PCREOption] → QuasiQuoter
mkRegexQQ opts = QuasiQuoter
{ quoteExp = quoteExpRegex opts
, quotePat = undefined
, quoteType = undefined
, quoteDec = undefined }
re ∷ QuasiQuoter
re = mkRegexQQ [utf8]