module Text.Regex.TDFA.UTF8(Utf8(Utf8,utf8)) where

import Data.Array.IArray((!))
import Data.Maybe(listToMaybe)
import qualified Data.ByteString.Lazy.Char8 as L(ByteString,empty)

import qualified Data.ByteString.Lazy.UTF8 as U(take,drop,uncons,toString)

import Text.Regex.Base(RegexLike(..),RegexMaker(..),Extract(..),MatchArray,RegexContext(..))
import Text.Regex.Base.Impl(polymatch,polymatchM)

import Text.Regex.TDFA.String() -- instances only
import Text.Regex.TDFA.Common(Regex(..),CompOption,ExecOption(captureGroups),Position)
import Text.Regex.TDFA.NewDFA.Uncons(Uncons(uncons))
import qualified Text.Regex.TDFA.NewDFA.Engine as Engine(execMatch)
import qualified Text.Regex.TDFA.NewDFA.Tester as Tester(matchTest)

-- This is a newtype for the instances we are making.
-- You will likely want to use a pre-existing newtype from your code.
newtype Utf8 = Utf8 { utf8 :: L.ByteString }
  deriving (Show,Read,Eq,Ord)

instance Extract Utf8 where
  {-# INLINE empty #-}
  empty = Utf8 L.empty
  {-# INLINE before #-}
  before i = Utf8 . U.take (fromIntegral i) . utf8
  {-# INLINE after #-}
  after i = Utf8 . U.drop (fromIntegral i) . utf8

instance Uncons Utf8 where
  {-# INLINE uncons #-}
  uncons = fmap (fmap Utf8) . U.uncons . utf8

instance RegexMaker Regex CompOption ExecOption Utf8 where
  makeRegexOptsM c e source = makeRegexOptsM c e (U.toString (utf8 source))

{-# SPECIALIZE execMatch :: Regex -> Position -> Char -> Utf8 -> [MatchArray] #-}
execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray]
execMatch = Engine.execMatch

{-# SPECIALIZE myMatchTest :: Regex -> Utf8 -> Bool #-}
myMatchTest :: Uncons text => Regex -> text -> Bool
myMatchTest = Tester.matchTest

instance RegexLike Regex Utf8 where
  matchOnce r s = listToMaybe (matchAll r s)
  matchAll r s = execMatch r 0 '\n' s
  matchCount r s = length (matchAll r' s)
    where r' = r { regex_execOptions = (regex_execOptions r) {captureGroups = False} }
  matchTest = myMatchTest
  matchOnceText regex source =
    fmap (\ ma ->
            let (o,l) = ma!0
            in (before o source
               ,fmap (\ ol -> (extract ol source,ol)) ma
               ,after (o+l) source))
         (matchOnce regex source)
  matchAllText regex source =
    let go i _ _ | i `seq` False = undefined
        go _i _t [] = []
        go i t (x:xs) =
          let (off0,len0) = x!0
              trans pair@(off,len) = (extract (off-i,len) t,pair)
              t' = after (off0+(len0-i)) t
          in fmap trans x : seq t' (go (off0+len0) t' xs)
    in go 0 source (matchAll regex source)

instance RegexContext Regex Utf8 Utf8 where
  match = polymatch
  matchM = polymatchM