module Text.Regex.TDFA.UTF8(Utf8(Utf8,utf8)) where
import Data.Array.IArray((!))
import Data.Maybe(listToMaybe)
import qualified Data.ByteString.Lazy.Char8 as L(ByteString,empty)
import qualified Data.ByteString.Lazy.UTF8 as U(take,drop,uncons,toString)
import Text.Regex.Base(RegexLike(..),RegexMaker(..),Extract(..),MatchArray,RegexContext(..))
import Text.Regex.Base.Impl(polymatch,polymatchM)
import Text.Regex.TDFA.String()
import Text.Regex.TDFA.Common(Regex(..),CompOption,ExecOption(captureGroups),Position)
import Text.Regex.TDFA.NewDFA.Uncons(Uncons(uncons))
import qualified Text.Regex.TDFA.NewDFA.Engine as Engine(execMatch)
import qualified Text.Regex.TDFA.NewDFA.Tester as Tester(matchTest)
newtype Utf8 = Utf8 { utf8 :: L.ByteString }
deriving (Show,Read,Eq,Ord)
instance Extract Utf8 where
empty = Utf8 L.empty
before i = Utf8 . U.take (fromIntegral i) . utf8
after i = Utf8 . U.drop (fromIntegral i) . utf8
instance Uncons Utf8 where
uncons = fmap (fmap Utf8) . U.uncons . utf8
instance RegexMaker Regex CompOption ExecOption Utf8 where
makeRegexOptsM c e source = makeRegexOptsM c e (U.toString (utf8 source))
execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray]
execMatch = Engine.execMatch
myMatchTest :: Uncons text => Regex -> text -> Bool
myMatchTest = Tester.matchTest
instance RegexLike Regex Utf8 where
matchOnce r s = listToMaybe (matchAll r s)
matchAll r s = execMatch r 0 '\n' s
matchCount r s = length (matchAll r' s)
where r' = r { regex_execOptions = (regex_execOptions r) {captureGroups = False} }
matchTest = myMatchTest
matchOnceText regex source =
fmap (\ ma ->
let (o,l) = ma!0
in (before o source
,fmap (\ ol -> (extract ol source,ol)) ma
,after (o+l) source))
(matchOnce regex source)
matchAllText regex source =
let go i _ _ | i `seq` False = undefined
go _i _t [] = []
go i t (x:xs) =
let (off0,len0) = x!0
trans pair@(off,len) = (extract (offi,len) t,pair)
t' = after (off0+(len0i)) t
in fmap trans x : seq t' (go (off0+len0) t' xs)
in go 0 source (matchAll regex source)
instance RegexContext Regex Utf8 Utf8 where
match = polymatch
matchM = polymatchM