\begin{code}
{-# LANGUAGE RecordWildCards            #-}
{-# LANGUAGE FlexibleContexts           #-}
{-# LANGUAGE FlexibleInstances          #-}
{-# LANGUAGE UndecidableInstances       #-}
{-# LANGUAGE MultiParamTypeClasses      #-}
{-# LANGUAGE DeriveDataTypeable         #-}
{-# LANGUAGE MonoLocalBinds             #-}
\end{code}

\begin{code}
module Text.RE.ZeInternals.Types.Match
  ( Match(..)
  , noMatch
  , emptyMatchArray
  , matched
  , matchedText
  , matchCapture
  , matchCaptures
  , (!$$)
  , captureText
  , (!$$?)
  , captureTextMaybe
  , (!$)
  , capture
  , (!$?)
  , captureMaybe
  , RegexFix(..)
  , convertMatchText
  ) where
\end{code}

\begin{code}
import           Data.Array
import           Data.Bits
import qualified Data.ByteString                as BW
import qualified Data.ByteString.Char8          as B
import qualified Data.ByteString.Lazy.Char8     as LBS
import qualified Data.ByteString.UTF8           as B
import           Data.Maybe
import qualified Data.Sequence                  as S
import qualified Data.Text                      as T
import qualified Data.Text.Encoding             as T
import qualified Data.Text.Lazy                 as LT
import           Data.Typeable
import           Data.Word
import           Text.RE.ZeInternals.Types.Capture
import           Text.RE.ZeInternals.Types.CaptureID
import           Text.Regex.Base
import qualified Text.Regex.PCRE                as PCRE
import qualified Text.Regex.TDFA                as TDFA

infixl 9 !$, !$$
\end{code}

\begin{code}
-- | the result of matching a RE to a text once (with @?=~@), retaining
-- the text that was matched against
data Match a =
  Match
    { Match a -> a
matchSource  :: !a                -- ^ the whole source text
    , Match a -> CaptureNames
captureNames :: !CaptureNames     -- ^ the RE's capture names
    , Match a -> Array CaptureOrdinal (Capture a)
matchArray   :: !(Array CaptureOrdinal (Capture a))
                                        -- ^ 0..n-1 captures,
                                        -- starting with the
                                        -- text matched by the
                                        -- whole RE
    }
  deriving (Int -> Match a -> ShowS
[Match a] -> ShowS
Match a -> String
(Int -> Match a -> ShowS)
-> (Match a -> String) -> ([Match a] -> ShowS) -> Show (Match a)
forall a. Show a => Int -> Match a -> ShowS
forall a. Show a => [Match a] -> ShowS
forall a. Show a => Match a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Match a] -> ShowS
$cshowList :: forall a. Show a => [Match a] -> ShowS
show :: Match a -> String
$cshow :: forall a. Show a => Match a -> String
showsPrec :: Int -> Match a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> Match a -> ShowS
Show,Match a -> Match a -> Bool
(Match a -> Match a -> Bool)
-> (Match a -> Match a -> Bool) -> Eq (Match a)
forall a. Eq a => Match a -> Match a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Match a -> Match a -> Bool
$c/= :: forall a. Eq a => Match a -> Match a -> Bool
== :: Match a -> Match a -> Bool
$c== :: forall a. Eq a => Match a -> Match a -> Bool
Eq,Typeable)
\end{code}

\begin{code}
-- | Construct a Match that does not match anything.
noMatch :: a -> Match a
noMatch :: a -> Match a
noMatch a
t = a -> CaptureNames -> Array CaptureOrdinal (Capture a) -> Match a
forall a.
a -> CaptureNames -> Array CaptureOrdinal (Capture a) -> Match a
Match a
t CaptureNames
noCaptureNames Array CaptureOrdinal (Capture a)
forall a. Array CaptureOrdinal (Capture a)
emptyMatchArray

-- | an empty array of Capture
emptyMatchArray :: Array CaptureOrdinal (Capture a)
emptyMatchArray :: Array CaptureOrdinal (Capture a)
emptyMatchArray = (CaptureOrdinal, CaptureOrdinal)
-> [Capture a] -> Array CaptureOrdinal (Capture a)
forall i e. Ix i => (i, i) -> [e] -> Array i e
listArray (Int -> CaptureOrdinal
CaptureOrdinal Int
0,Int -> CaptureOrdinal
CaptureOrdinal (Int -> CaptureOrdinal) -> Int -> CaptureOrdinal
forall a b. (a -> b) -> a -> b
$ -Int
1) []
\end{code}

\begin{code}
instance Functor Match where
  fmap :: (a -> b) -> Match a -> Match b
fmap a -> b
f Match{a
Array CaptureOrdinal (Capture a)
CaptureNames
matchArray :: Array CaptureOrdinal (Capture a)
captureNames :: CaptureNames
matchSource :: a
matchArray :: forall a. Match a -> Array CaptureOrdinal (Capture a)
captureNames :: forall a. Match a -> CaptureNames
matchSource :: forall a. Match a -> a
..} =
    Match :: forall a.
a -> CaptureNames -> Array CaptureOrdinal (Capture a) -> Match a
Match
      { matchSource :: b
matchSource  = a -> b
f a
matchSource
      , captureNames :: CaptureNames
captureNames = CaptureNames
captureNames
      , matchArray :: Array CaptureOrdinal (Capture b)
matchArray   = (Capture a -> Capture b)
-> Array CaptureOrdinal (Capture a)
-> Array CaptureOrdinal (Capture b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((a -> b) -> Capture a -> Capture b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap a -> b
f) Array CaptureOrdinal (Capture a)
matchArray
      }
\end{code}

\begin{code}
-- | tests whether the RE matched the source text at all
matched :: Match a -> Bool
matched :: Match a -> Bool
matched = Maybe (Capture a) -> Bool
forall a. Maybe a -> Bool
isJust (Maybe (Capture a) -> Bool)
-> (Match a -> Maybe (Capture a)) -> Match a -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Match a -> Maybe (Capture a)
forall a. Match a -> Maybe (Capture a)
matchCapture

-- | yields the text matched by the RE, Nothing if no match
matchedText :: Match a -> Maybe a
matchedText :: Match a -> Maybe a
matchedText = (Capture a -> a) -> Maybe (Capture a) -> Maybe a
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Capture a -> a
forall a. Capture a -> a
capturedText (Maybe (Capture a) -> Maybe a)
-> (Match a -> Maybe (Capture a)) -> Match a -> Maybe a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Match a -> Maybe (Capture a)
forall a. Match a -> Maybe (Capture a)
matchCapture

-- | the top-level capture if the source text matched the RE,
-- Nothing otherwise
matchCapture :: Match a -> Maybe (Capture a)
matchCapture :: Match a -> Maybe (Capture a)
matchCapture = ((Capture a, [Capture a]) -> Capture a)
-> Maybe (Capture a, [Capture a]) -> Maybe (Capture a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Capture a, [Capture a]) -> Capture a
forall a b. (a, b) -> a
fst (Maybe (Capture a, [Capture a]) -> Maybe (Capture a))
-> (Match a -> Maybe (Capture a, [Capture a]))
-> Match a
-> Maybe (Capture a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Match a -> Maybe (Capture a, [Capture a])
forall a. Match a -> Maybe (Capture a, [Capture a])
matchCaptures

-- | the main top-level capture (capture \'0'') and the sub captures
-- if the text matched the RE, @Nothing@ otherwise
matchCaptures :: Match a -> Maybe (Capture a,[Capture a])
matchCaptures :: Match a -> Maybe (Capture a, [Capture a])
matchCaptures Match{a
Array CaptureOrdinal (Capture a)
CaptureNames
matchArray :: Array CaptureOrdinal (Capture a)
captureNames :: CaptureNames
matchSource :: a
matchArray :: forall a. Match a -> Array CaptureOrdinal (Capture a)
captureNames :: forall a. Match a -> CaptureNames
matchSource :: forall a. Match a -> a
..} = case (CaptureOrdinal, CaptureOrdinal) -> Int
forall a. Ix a => (a, a) -> Int
rangeSize (Array CaptureOrdinal (Capture a)
-> (CaptureOrdinal, CaptureOrdinal)
forall i e. Array i e -> (i, i)
bounds Array CaptureOrdinal (Capture a)
matchArray) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 of
  Bool
True  -> Maybe (Capture a, [Capture a])
forall a. Maybe a
Nothing
  Bool
False -> (Capture a, [Capture a]) -> Maybe (Capture a, [Capture a])
forall a. a -> Maybe a
Just (Array CaptureOrdinal (Capture a)
matchArrayArray CaptureOrdinal (Capture a) -> CaptureOrdinal -> Capture a
forall i e. Ix i => Array i e -> i -> e
!CaptureOrdinal
0,Int -> [Capture a] -> [Capture a]
forall a. Int -> [a] -> [a]
drop Int
1 ([Capture a] -> [Capture a]) -> [Capture a] -> [Capture a]
forall a b. (a -> b) -> a -> b
$ Array CaptureOrdinal (Capture a) -> [Capture a]
forall i e. Array i e -> [e]
elems Array CaptureOrdinal (Capture a)
matchArray)

-- | an alternative for captureText
(!$$) :: Match a -> CaptureID -> a
!$$ :: Match a -> CaptureID -> a
(!$$) = (CaptureID -> Match a -> a) -> Match a -> CaptureID -> a
forall a b c. (a -> b -> c) -> b -> a -> c
flip CaptureID -> Match a -> a
forall a. CaptureID -> Match a -> a
captureText

-- | look up the text of the nth capture, 0 being the match of the whole
-- RE against the source text, 1, the first bracketed sub-expression to
-- be matched and so on
captureText :: CaptureID -> Match a -> a
captureText :: CaptureID -> Match a -> a
captureText CaptureID
cid Match a
mtch = Capture a -> a
forall a. Capture a -> a
capturedText (Capture a -> a) -> Capture a -> a
forall a b. (a -> b) -> a -> b
$ CaptureID -> Match a -> Capture a
forall a. CaptureID -> Match a -> Capture a
capture CaptureID
cid Match a
mtch

-- | an alternative for captureTextMaybe
(!$$?) :: Match a -> CaptureID -> Maybe a
!$$? :: Match a -> CaptureID -> Maybe a
(!$$?) = (CaptureID -> Match a -> Maybe a)
-> Match a -> CaptureID -> Maybe a
forall a b c. (a -> b -> c) -> b -> a -> c
flip CaptureID -> Match a -> Maybe a
forall a. CaptureID -> Match a -> Maybe a
captureTextMaybe

-- | look up the text of the nth capture (0 being the match of the
-- whole), returning Nothing if the Match doesn't contain the capture
captureTextMaybe :: CaptureID -> Match a -> Maybe a
captureTextMaybe :: CaptureID -> Match a -> Maybe a
captureTextMaybe CaptureID
cid Match a
mtch = do
    Capture a
cap <- Match a
mtch Match a -> CaptureID -> Maybe (Capture a)
forall a. Match a -> CaptureID -> Maybe (Capture a)
!$? CaptureID
cid
    case Capture a -> Bool
forall a. Capture a -> Bool
hasCaptured Capture a
cap of
      Bool
True  -> a -> Maybe a
forall a. a -> Maybe a
Just (a -> Maybe a) -> a -> Maybe a
forall a b. (a -> b) -> a -> b
$ Capture a -> a
forall a. Capture a -> a
capturedText Capture a
cap
      Bool
False -> Maybe a
forall a. Maybe a
Nothing

-- | an alternative for capture
(!$) :: Match a -> CaptureID -> Capture a
!$ :: Match a -> CaptureID -> Capture a
(!$) = (CaptureID -> Match a -> Capture a)
-> Match a -> CaptureID -> Capture a
forall a b c. (a -> b -> c) -> b -> a -> c
flip CaptureID -> Match a -> Capture a
forall a. CaptureID -> Match a -> Capture a
capture

-- | look up the nth capture, 0 being the match of the whole RE against
-- the source text, 1, the first bracketed sub-expression to be matched
-- and so on
capture :: CaptureID -> Match a -> Capture a
capture :: CaptureID -> Match a -> Capture a
capture CaptureID
cid Match a
mtch = Capture a -> Maybe (Capture a) -> Capture a
forall a. a -> Maybe a -> a
fromMaybe Capture a
oops (Maybe (Capture a) -> Capture a) -> Maybe (Capture a) -> Capture a
forall a b. (a -> b) -> a -> b
$ Match a
mtch Match a -> CaptureID -> Maybe (Capture a)
forall a. Match a -> CaptureID -> Maybe (Capture a)
!$? CaptureID
cid
  where
    oops :: Capture a
oops = String -> Capture a
forall a. HasCallStack => String -> a
error (String -> Capture a) -> String -> Capture a
forall a b. (a -> b) -> a -> b
$ String
"capture: out of bounds (" String -> ShowS
forall a. [a] -> [a] -> [a]
++ CaptureID -> String
forall a. Show a => a -> String
show CaptureID
cid String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
")"

-- | an alternative for capture captureMaybe
(!$?) :: Match a -> CaptureID -> Maybe (Capture a)
!$? :: Match a -> CaptureID -> Maybe (Capture a)
(!$?) = (CaptureID -> Match a -> Maybe (Capture a))
-> Match a -> CaptureID -> Maybe (Capture a)
forall a b c. (a -> b -> c) -> b -> a -> c
flip CaptureID -> Match a -> Maybe (Capture a)
forall a. CaptureID -> Match a -> Maybe (Capture a)
captureMaybe

-- | look up the nth capture, 0 being the match of the whole RE against
-- the source text, 1, the first bracketed sub-expression to be matched
-- and so on, returning Nothing if there is no such capture, or if the
-- capture failed to capture anything (being in a failed alternate)
captureMaybe :: CaptureID -> Match a -> Maybe (Capture a)
captureMaybe :: CaptureID -> Match a -> Maybe (Capture a)
captureMaybe CaptureID
cid mtch :: Match a
mtch@Match{a
Array CaptureOrdinal (Capture a)
CaptureNames
matchArray :: Array CaptureOrdinal (Capture a)
captureNames :: CaptureNames
matchSource :: a
matchArray :: forall a. Match a -> Array CaptureOrdinal (Capture a)
captureNames :: forall a. Match a -> CaptureNames
matchSource :: forall a. Match a -> a
..} = do
  CaptureOrdinal
i   <- CaptureID -> Match a -> Maybe CaptureOrdinal
forall a. CaptureID -> Match a -> Maybe CaptureOrdinal
lookupCaptureID CaptureID
cid Match a
mtch
  Capture a
cap <- case Array CaptureOrdinal (Capture a)
-> (CaptureOrdinal, CaptureOrdinal)
forall i e. Array i e -> (i, i)
bounds Array CaptureOrdinal (Capture a)
matchArray (CaptureOrdinal, CaptureOrdinal) -> CaptureOrdinal -> Bool
forall a. Ix a => (a, a) -> a -> Bool
`inRange` CaptureOrdinal
i of
    Bool
True  -> Capture a -> Maybe (Capture a)
forall a. a -> Maybe a
Just (Capture a -> Maybe (Capture a)) -> Capture a -> Maybe (Capture a)
forall a b. (a -> b) -> a -> b
$ Array CaptureOrdinal (Capture a)
matchArray Array CaptureOrdinal (Capture a) -> CaptureOrdinal -> Capture a
forall i e. Ix i => Array i e -> i -> e
! CaptureOrdinal
i
    Bool
False -> Maybe (Capture a)
forall a. Maybe a
Nothing
  case Capture a -> Bool
forall a. Capture a -> Bool
hasCaptured Capture a
cap of
    Bool
True  -> Capture a -> Maybe (Capture a)
forall a. a -> Maybe a
Just Capture a
cap
    Bool
False -> Maybe (Capture a)
forall a. Maybe a
Nothing

lookupCaptureID :: CaptureID -> Match a -> Maybe CaptureOrdinal
lookupCaptureID :: CaptureID -> Match a -> Maybe CaptureOrdinal
lookupCaptureID CaptureID
cid Match{a
Array CaptureOrdinal (Capture a)
CaptureNames
matchArray :: Array CaptureOrdinal (Capture a)
captureNames :: CaptureNames
matchSource :: a
matchArray :: forall a. Match a -> Array CaptureOrdinal (Capture a)
captureNames :: forall a. Match a -> CaptureNames
matchSource :: forall a. Match a -> a
..} =
    (String -> Maybe CaptureOrdinal)
-> (CaptureOrdinal -> Maybe CaptureOrdinal)
-> Either String CaptureOrdinal
-> Maybe CaptureOrdinal
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Maybe CaptureOrdinal -> String -> Maybe CaptureOrdinal
forall a b. a -> b -> a
const Maybe CaptureOrdinal
forall a. Maybe a
Nothing) CaptureOrdinal -> Maybe CaptureOrdinal
forall a. a -> Maybe a
Just (Either String CaptureOrdinal -> Maybe CaptureOrdinal)
-> Either String CaptureOrdinal -> Maybe CaptureOrdinal
forall a b. (a -> b) -> a -> b
$ CaptureID -> CaptureNames -> Either String CaptureOrdinal
findCaptureID CaptureID
cid CaptureNames
captureNames
\end{code}


\begin{code}
-- | this instance hooks 'Match' into regex-base: regex consumers need
-- not worry about any of this
instance
    ( RegexContext regex source (AllTextSubmatches (Array Int) (source,(Int,Int)))
    , RegexLike    regex source
    , RegexFix     regex source
    ) =>
  RegexContext regex source (Match source) where
    match :: regex -> source -> Match source
match  regex
r source
s = regex -> source -> MatchText source -> Match source
forall regex source.
RegexFix regex source =>
regex -> source -> MatchText source -> Match source
convertMatchText regex
r source
s (MatchText source -> Match source)
-> MatchText source -> Match source
forall a b. (a -> b) -> a -> b
$ AllTextSubmatches (Array Int) (source, (Int, Int))
-> MatchText source
forall (f :: * -> *) b. AllTextSubmatches f b -> f b
getAllTextSubmatches (AllTextSubmatches (Array Int) (source, (Int, Int))
 -> MatchText source)
-> AllTextSubmatches (Array Int) (source, (Int, Int))
-> MatchText source
forall a b. (a -> b) -> a -> b
$ regex
-> source -> AllTextSubmatches (Array Int) (source, (Int, Int))
forall regex source target.
RegexContext regex source target =>
regex -> source -> target
match regex
r source
s
    matchM :: regex -> source -> m (Match source)
matchM regex
r source
s = do
      AllTextSubmatches (Array Int) (source, (Int, Int))
y <- regex
-> source -> m (AllTextSubmatches (Array Int) (source, (Int, Int)))
forall regex source target (m :: * -> *).
(RegexContext regex source target, MonadFail m) =>
regex -> source -> m target
matchM regex
r source
s
      Match source -> m (Match source)
forall (m :: * -> *) a. Monad m => a -> m a
return (Match source -> m (Match source))
-> Match source -> m (Match source)
forall a b. (a -> b) -> a -> b
$ regex -> source -> MatchText source -> Match source
forall regex source.
RegexFix regex source =>
regex -> source -> MatchText source -> Match source
convertMatchText regex
r source
s (MatchText source -> Match source)
-> MatchText source -> Match source
forall a b. (a -> b) -> a -> b
$ AllTextSubmatches (Array Int) (source, (Int, Int))
-> MatchText source
forall (f :: * -> *) b. AllTextSubmatches f b -> f b
getAllTextSubmatches AllTextSubmatches (Array Int) (source, (Int, Int))
y
\end{code}

\begin{code}
-- | convert a regex-base native MatchText into a regex Match type
convertMatchText :: RegexFix regex source
                 => regex
                 -> source
                 -> MatchText source
                 -> Match source
convertMatchText :: regex -> source -> MatchText source -> Match source
convertMatchText regex
re source
hay MatchText source
arr =
    Match :: forall a.
a -> CaptureNames -> Array CaptureOrdinal (Capture a) -> Match a
Match
      { matchSource :: source
matchSource  = source
hay
      , captureNames :: CaptureNames
captureNames = CaptureNames
noCaptureNames
      , matchArray :: Array CaptureOrdinal (Capture source)
matchArray   =
          (CaptureOrdinal, CaptureOrdinal)
-> (CaptureOrdinal -> Int)
-> Array Int (Capture source)
-> Array CaptureOrdinal (Capture source)
forall i j e.
(Ix i, Ix j) =>
(i, i) -> (i -> j) -> Array j e -> Array i e
ixmap (Int -> CaptureOrdinal
CaptureOrdinal Int
lo,Int -> CaptureOrdinal
CaptureOrdinal Int
hi) CaptureOrdinal -> Int
getCaptureOrdinal (Array Int (Capture source)
 -> Array CaptureOrdinal (Capture source))
-> Array Int (Capture source)
-> Array CaptureOrdinal (Capture source)
forall a b. (a -> b) -> a -> b
$
            ((source, (Int, Int)) -> Capture source)
-> MatchText source -> Array Int (Capture source)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (source, (Int, Int)) -> Capture source
f MatchText source
arr
      }
  where
    (Int
lo,Int
hi) = MatchText source -> (Int, Int)
forall i e. Array i e -> (i, i)
bounds MatchText source
arr

    f :: (source, (Int, Int)) -> Capture source
f (source
ndl,(Int
off_,Int
len_)) =
      Capture :: forall a. a -> a -> Int -> Int -> Capture a
Capture
        { captureSource :: source
captureSource = source
hay
        , capturedText :: source
capturedText  = source
ndl
        , captureOffset :: Int
captureOffset = Int
off
        , captureLength :: Int
captureLength = Int
len
        }
      where
        CharRange Int
off Int
len = regex -> source -> Int -> Int -> CharRange
forall regex source.
RegexFix regex source =>
regex -> source -> Int -> Int -> CharRange
utf8_correct regex
re source
hay Int
off_ Int
len_
\end{code}

\begin{code}
data CharRange = CharRange !Int !Int
  deriving (Int -> CharRange -> ShowS
[CharRange] -> ShowS
CharRange -> String
(Int -> CharRange -> ShowS)
-> (CharRange -> String)
-> ([CharRange] -> ShowS)
-> Show CharRange
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CharRange] -> ShowS
$cshowList :: [CharRange] -> ShowS
show :: CharRange -> String
$cshow :: CharRange -> String
showsPrec :: Int -> CharRange -> ShowS
$cshowsPrec :: Int -> CharRange -> ShowS
Show)

class RegexFix regex source where
  utf8_correct :: regex -> source -> Int -> Int -> CharRange
  utf8_correct regex
_ source
_ = Int -> Int -> CharRange
CharRange

instance RegexFix TDFA.Regex [Char]         where
instance RegexFix TDFA.Regex B.ByteString   where
instance RegexFix TDFA.Regex LBS.ByteString where
instance RegexFix TDFA.Regex T.Text         where
instance RegexFix TDFA.Regex LT.Text        where
instance RegexFix TDFA.Regex (S.Seq Char)   where

instance RegexFix PCRE.Regex [Char]         where
  utf8_correct :: Regex -> String -> Int -> Int -> CharRange
utf8_correct Regex
_ = ByteString -> Int -> Int -> CharRange
utf8_correct_bs (ByteString -> Int -> Int -> CharRange)
-> (String -> ByteString) -> String -> Int -> Int -> CharRange
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ByteString
B.fromString
instance RegexFix PCRE.Regex B.ByteString   where
instance RegexFix PCRE.Regex LBS.ByteString where
instance RegexFix PCRE.Regex T.Text         where
  utf8_correct :: Regex -> Text -> Int -> Int -> CharRange
utf8_correct Regex
_ = ByteString -> Int -> Int -> CharRange
utf8_correct_bs (ByteString -> Int -> Int -> CharRange)
-> (Text -> ByteString) -> Text -> Int -> Int -> CharRange
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
T.encodeUtf8
instance RegexFix PCRE.Regex LT.Text        where
  utf8_correct :: Regex -> Text -> Int -> Int -> CharRange
utf8_correct Regex
_ = ByteString -> Int -> Int -> CharRange
utf8_correct_bs (ByteString -> Int -> Int -> CharRange)
-> (Text -> ByteString) -> Text -> Int -> Int -> CharRange
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
T.encodeUtf8 (Text -> ByteString) -> (Text -> Text) -> Text -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
LT.toStrict
instance RegexFix PCRE.Regex (S.Seq Char)   where

-- convert a byte offset+length in a UTF-8-encoded ByteString
-- into a character offset+length
utf8_correct_bs :: B.ByteString -> Int -> Int -> CharRange
utf8_correct_bs :: ByteString -> Int -> Int -> CharRange
utf8_correct_bs ByteString
bs Int
ix0 Int
ln0 = case Int
ix0Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
ln0 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> ByteString -> Int
BW.length ByteString
bs of
    Bool
True  -> String -> CharRange
forall a. HasCallStack => String -> a
error String
"utf8_correct_bs: index+length out of range"
    Bool
False -> Int -> Int -> CharRange
skip Int
0 Int
0     -- BW.index calls below should not fail
  where
    skip :: Int -> Int -> CharRange
skip Int
ix Int
di = case Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Int
ix Int
ix0 of
      Ordering
GT -> case Int
ix0 of
        -- -1 is used as a magic number to indicate failure to match
        -1 -> Int -> Int -> CharRange
CharRange Int
ix0 Int
ln0
        Int
_ -> String -> CharRange
forall a. HasCallStack => String -> a
error String
"utf8_correct_bs: UTF-8 decoding error"
      Ordering
EQ -> Int -> Int -> Int -> Int -> CharRange
count Int
ix Int
di Int
0 Int
ln0
      Ordering
LT -> case Word8 -> UTF8Size
u8_width (Word8 -> UTF8Size) -> Word8 -> UTF8Size
forall a b. (a -> b) -> a -> b
$ ByteString -> Int -> Word8
BW.index ByteString
bs Int
ix of
        UTF8Size
Single    -> Int -> Int -> CharRange
skip (Int
ixInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)   Int
di
        UTF8Size
Double    -> Int -> Int -> CharRange
skip (Int
ixInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2) (Int -> CharRange) -> Int -> CharRange
forall a b. (a -> b) -> a -> b
$ Int
diInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1
        UTF8Size
Triple    -> Int -> Int -> CharRange
skip (Int
ixInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3) (Int -> CharRange) -> Int -> CharRange
forall a b. (a -> b) -> a -> b
$ Int
diInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2
        UTF8Size
Quadruple -> Int -> Int -> CharRange
skip (Int
ixInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
4) (Int -> CharRange) -> Int -> CharRange
forall a b. (a -> b) -> a -> b
$ Int
diInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3

    count :: Int -> Int -> Int -> Int -> CharRange
count Int
ix Int
di Int
dl Int
c = case Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Int
c Int
0 of
      Ordering
LT -> String -> CharRange
forall a. HasCallStack => String -> a
error String
"utf8_correct_bs: length ends inside character"
      Ordering
EQ -> Int -> Int -> CharRange
CharRange (Int
ix0Int -> Int -> Int
forall a. Num a => a -> a -> a
-Int
di) (Int
ln0Int -> Int -> Int
forall a. Num a => a -> a -> a
-Int
dl)
      Ordering
GT -> case Word8 -> UTF8Size
u8_width (Word8 -> UTF8Size) -> Word8 -> UTF8Size
forall a b. (a -> b) -> a -> b
$ ByteString -> Int -> Word8
BW.index ByteString
bs Int
ix of
        UTF8Size
Single    -> Int -> Int -> Int -> Int -> CharRange
count (Int
ixInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
di  Int
dl    (Int -> CharRange) -> Int -> CharRange
forall a b. (a -> b) -> a -> b
$ Int
cInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1
        UTF8Size
Double    -> Int -> Int -> Int -> Int -> CharRange
count (Int
ixInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2) Int
di (Int
dlInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) (Int -> CharRange) -> Int -> CharRange
forall a b. (a -> b) -> a -> b
$ Int
cInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
2
        UTF8Size
Triple    -> Int -> Int -> Int -> Int -> CharRange
count (Int
ixInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3) Int
di (Int
dlInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2) (Int -> CharRange) -> Int -> CharRange
forall a b. (a -> b) -> a -> b
$ Int
cInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
3
        UTF8Size
Quadruple -> Int -> Int -> Int -> Int -> CharRange
count (Int
ixInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
4) Int
di (Int
dlInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3) (Int -> CharRange) -> Int -> CharRange
forall a b. (a -> b) -> a -> b
$ Int
cInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
4

data UTF8Size = Single | Double | Triple | Quadruple
  deriving (Int -> UTF8Size -> ShowS
[UTF8Size] -> ShowS
UTF8Size -> String
(Int -> UTF8Size -> ShowS)
-> (UTF8Size -> String) -> ([UTF8Size] -> ShowS) -> Show UTF8Size
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [UTF8Size] -> ShowS
$cshowList :: [UTF8Size] -> ShowS
show :: UTF8Size -> String
$cshow :: UTF8Size -> String
showsPrec :: Int -> UTF8Size -> ShowS
$cshowsPrec :: Int -> UTF8Size -> ShowS
Show)

u8_width :: Word8 -> UTF8Size
u8_width :: Word8 -> UTF8Size
u8_width Word8
w8 = case   Word8
w8 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x80 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0x00 of
  Bool
True  ->       UTF8Size
Single
  Bool
False -> case      Word8
w8 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0xE0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0xC0 of
    Bool
True  ->     UTF8Size
Double
    Bool
False -> case    Word8
w8 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0xF0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0xE0 of
      Bool
True  ->   UTF8Size
Triple
      Bool
False -> case  Word8
w8 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0xF8 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0xF0 of
        Bool
True  -> UTF8Size
Quadruple
        Bool
False -> String -> UTF8Size
forall a. HasCallStack => String -> a
error String
"u8_width: UTF-8 decoding error"
\end{code}