module Text.Tokenizer.Split (
TokenizeMap (..), singleTokMap, insert, makeTokenizeMap,
TokenizeError (..), tokenize
) where
import Data.Map (Map)
import qualified Data.Map as M
import Data.IntMap (IntMap)
import qualified Data.IntMap as IM
import qualified Data.Set as S
import Data.Bifunctor (Bifunctor(..))
import Control.Monad.Trans.State (State, evalState, gets)
import Data.Maybe (fromMaybe)
import Data.Foldable (foldrM)
import Control.Monad (guard)
import Data.Coerce (coerce)
import qualified Text.Tokenizer.BlackWhiteSet as BWS
import Control.Applicative (Alternative(..))
import Text.Tokenizer.Types
(Token(..), Count(..), Repeatable(..), RToken(..), TokId, Alt(..), makeRToken)
import Text.Tokenizer.BlackWhiteSet (BlackWhiteSet(..))
modifyId :: (TokId -> TokId) -> RToken c -> RToken c
modifyId :: (TokId -> TokId) -> RToken c -> RToken c
modifyId TokId -> TokId
f tok :: RToken c
tok@RToken {TokId
$sel:tokId:RToken :: forall c. RToken c -> TokId
tokId :: TokId
tokId} = RToken c
tok {$sel:tokId:RToken :: TokId
tokId = TokId -> TokId
f TokId
tokId}
data TokenizeMap k c = TokenizeMap {
TokenizeMap k c -> TokId
tokCount :: Int,
TokenizeMap k c -> Map c [RToken c]
charTokMap :: Map c [RToken c],
TokenizeMap k c -> [RToken c]
blackToks :: [RToken c],
TokenizeMap k c -> IntMap k
tokNames :: IntMap k
} deriving (TokId -> TokenizeMap k c -> ShowS
[TokenizeMap k c] -> ShowS
TokenizeMap k c -> String
(TokId -> TokenizeMap k c -> ShowS)
-> (TokenizeMap k c -> String)
-> ([TokenizeMap k c] -> ShowS)
-> Show (TokenizeMap k c)
forall a.
(TokId -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall k c. (Show c, Show k) => TokId -> TokenizeMap k c -> ShowS
forall k c. (Show c, Show k) => [TokenizeMap k c] -> ShowS
forall k c. (Show c, Show k) => TokenizeMap k c -> String
showList :: [TokenizeMap k c] -> ShowS
$cshowList :: forall k c. (Show c, Show k) => [TokenizeMap k c] -> ShowS
show :: TokenizeMap k c -> String
$cshow :: forall k c. (Show c, Show k) => TokenizeMap k c -> String
showsPrec :: TokId -> TokenizeMap k c -> ShowS
$cshowsPrec :: forall k c. (Show c, Show k) => TokId -> TokenizeMap k c -> ShowS
Show)
instance Ord c => Semigroup (TokenizeMap k c) where
TokenizeMap TokId
tokCount' Map c [RToken c]
tokMap' [RToken c]
blackToks' IntMap k
tokNames'
<> :: TokenizeMap k c -> TokenizeMap k c -> TokenizeMap k c
<> TokenizeMap TokId
tokCount'' Map c [RToken c]
tokMap'' [RToken c]
blackToks'' IntMap k
tokNames'' =
TokenizeMap :: forall k c.
TokId
-> Map c [RToken c] -> [RToken c] -> IntMap k -> TokenizeMap k c
TokenizeMap
{ $sel:tokCount:TokenizeMap :: TokId
tokCount = TokId
tokCount' TokId -> TokId -> TokId
forall a. Num a => a -> a -> a
+ TokId
tokCount'',
$sel:charTokMap:TokenizeMap :: Map c [RToken c]
charTokMap = ([RToken c] -> [RToken c] -> [RToken c])
-> Map c [RToken c] -> Map c [RToken c] -> Map c [RToken c]
forall k a. Ord k => (a -> a -> a) -> Map k a -> Map k a -> Map k a
M.unionWith [RToken c] -> [RToken c] -> [RToken c]
forall a. Semigroup a => a -> a -> a
(<>) Map c [RToken c]
tokMap' Map c [RToken c]
tokMap''',
$sel:blackToks:TokenizeMap :: [RToken c]
blackToks = [RToken c]
blackToks' [RToken c] -> [RToken c] -> [RToken c]
forall a. Semigroup a => a -> a -> a
<> [RToken c]
blackToks''',
$sel:tokNames:TokenizeMap :: IntMap k
tokNames = IntMap k
tokNames' IntMap k -> IntMap k -> IntMap k
forall a. Semigroup a => a -> a -> a
<> IntMap k
tokNames'''
}
where
tokMap''' :: Map c [RToken c]
tokMap''' = (RToken c -> RToken c) -> [RToken c] -> [RToken c]
forall a b. (a -> b) -> [a] -> [b]
map ((TokId -> TokId) -> RToken c -> RToken c
forall c. (TokId -> TokId) -> RToken c -> RToken c
modifyId (TokId -> TokId -> TokId
forall a. Num a => a -> a -> a
+ TokId
tokCount')) ([RToken c] -> [RToken c]) -> Map c [RToken c] -> Map c [RToken c]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Map c [RToken c]
tokMap''
blackToks''' :: [RToken c]
blackToks''' = (RToken c -> RToken c) -> [RToken c] -> [RToken c]
forall a b. (a -> b) -> [a] -> [b]
map ((TokId -> TokId) -> RToken c -> RToken c
forall c. (TokId -> TokId) -> RToken c -> RToken c
modifyId (TokId -> TokId -> TokId
forall a. Num a => a -> a -> a
+ TokId
tokCount')) [RToken c]
blackToks''
tokNames''' :: IntMap k
tokNames''' = (TokId -> TokId) -> IntMap k -> IntMap k
forall a. (TokId -> TokId) -> IntMap a -> IntMap a
IM.mapKeysMonotonic (TokId -> TokId -> TokId
forall a. Num a => a -> a -> a
+ TokId
tokCount') IntMap k
tokNames''
instance Ord c => Monoid (TokenizeMap k c) where
mempty :: TokenizeMap k c
mempty = TokId
-> Map c [RToken c] -> [RToken c] -> IntMap k -> TokenizeMap k c
forall k c.
TokId
-> Map c [RToken c] -> [RToken c] -> IntMap k -> TokenizeMap k c
TokenizeMap TokId
0 Map c [RToken c]
forall a. Monoid a => a
mempty [RToken c]
forall a. Monoid a => a
mempty IntMap k
forall a. Monoid a => a
mempty
singleTokMap :: Ord c => Token k c -> TokenizeMap k c
singleTokMap :: Token k c -> TokenizeMap k c
singleTokMap tok :: Token k c
tok@Token {k
$sel:name:Token :: forall k c. Token k c -> k
name :: k
name, [Repeatable c]
$sel:body:Token :: forall k c. Token k c -> [Repeatable c]
body :: [Repeatable c]
body} =
TokenizeMap :: forall k c.
TokId
-> Map c [RToken c] -> [RToken c] -> IntMap k -> TokenizeMap k c
TokenizeMap
{ $sel:tokCount:TokenizeMap :: TokId
tokCount = TokId
1,
$sel:charTokMap:TokenizeMap :: Map c [RToken c]
charTokMap = case BlackWhiteSet c
bws of
BlackSet Set c
_ -> Map c [RToken c]
forall a. Monoid a => a
mempty
WhiteSet Set c
s -> [(c, [RToken c])] -> Map c [RToken c]
forall k a. Eq k => [(k, a)] -> Map k a
M.fromAscList ([(c, [RToken c])] -> Map c [RToken c])
-> (Set c -> [(c, [RToken c])]) -> Set c -> Map c [RToken c]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (c -> (c, [RToken c])) -> [c] -> [(c, [RToken c])]
forall a b. (a -> b) -> [a] -> [b]
map (, [RToken c
rtok]) ([c] -> [(c, [RToken c])])
-> (Set c -> [c]) -> Set c -> [(c, [RToken c])]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Set c -> [c]
forall a. Set a -> [a]
S.toList (Set c -> Map c [RToken c]) -> Set c -> Map c [RToken c]
forall a b. (a -> b) -> a -> b
$ Set c
s,
$sel:blackToks:TokenizeMap :: [RToken c]
blackToks = case BlackWhiteSet c
bws of
BlackSet Set c
_ -> [RToken c
rtok]
WhiteSet Set c
_ -> [RToken c]
forall a. Monoid a => a
mempty,
$sel:tokNames:TokenizeMap :: IntMap k
tokNames = TokId -> k -> IntMap k
forall a. TokId -> a -> IntMap a
IM.singleton TokId
tokId k
name
}
where
tokId :: TokId
tokId = TokId
0
bws :: BlackWhiteSet c
bws = Repeatable c -> BlackWhiteSet c
forall c. Repeatable c -> BlackWhiteSet c
getBWS (Repeatable c -> BlackWhiteSet c)
-> Repeatable c -> BlackWhiteSet c
forall a b. (a -> b) -> a -> b
$ [Repeatable c] -> Repeatable c
forall a. [a] -> a
head [Repeatable c]
body
rtok :: RToken c
rtok = TokId -> Token k c -> RToken c
forall k c. TokId -> Token k c -> RToken c
makeRToken TokId
tokId Token k c
tok
insert :: Ord c => Token k c -> TokenizeMap k c -> TokenizeMap k c
insert :: Token k c -> TokenizeMap k c -> TokenizeMap k c
insert Token k c
tok = (TokenizeMap k c -> TokenizeMap k c -> TokenizeMap k c
forall a. Semigroup a => a -> a -> a
<> Token k c -> TokenizeMap k c
forall c k. Ord c => Token k c -> TokenizeMap k c
singleTokMap Token k c
tok)
makeTokenizeMap :: Ord c => [Token k c] -> TokenizeMap k c
makeTokenizeMap :: [Token k c] -> TokenizeMap k c
makeTokenizeMap = (Token k c -> TokenizeMap k c -> TokenizeMap k c)
-> TokenizeMap k c -> [Token k c] -> TokenizeMap k c
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Token k c -> TokenizeMap k c -> TokenizeMap k c
forall c k.
Ord c =>
Token k c -> TokenizeMap k c -> TokenizeMap k c
insert TokenizeMap k c
forall a. Monoid a => a
mempty
data TokenizeError k c
= NoWayTokenize
Int
[(k, [c])]
| TwoWaysTokenize
Int
[(k, [c])]
[(k, [c])]
deriving (TokId -> TokenizeError k c -> ShowS
[TokenizeError k c] -> ShowS
TokenizeError k c -> String
(TokId -> TokenizeError k c -> ShowS)
-> (TokenizeError k c -> String)
-> ([TokenizeError k c] -> ShowS)
-> Show (TokenizeError k c)
forall a.
(TokId -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall k c. (Show k, Show c) => TokId -> TokenizeError k c -> ShowS
forall k c. (Show k, Show c) => [TokenizeError k c] -> ShowS
forall k c. (Show k, Show c) => TokenizeError k c -> String
showList :: [TokenizeError k c] -> ShowS
$cshowList :: forall k c. (Show k, Show c) => [TokenizeError k c] -> ShowS
show :: TokenizeError k c -> String
$cshow :: forall k c. (Show k, Show c) => TokenizeError k c -> String
showsPrec :: TokId -> TokenizeError k c -> ShowS
$cshowsPrec :: forall k c. (Show k, Show c) => TokId -> TokenizeError k c -> ShowS
Show, TokenizeError k c -> TokenizeError k c -> Bool
(TokenizeError k c -> TokenizeError k c -> Bool)
-> (TokenizeError k c -> TokenizeError k c -> Bool)
-> Eq (TokenizeError k c)
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
forall k c.
(Eq k, Eq c) =>
TokenizeError k c -> TokenizeError k c -> Bool
/= :: TokenizeError k c -> TokenizeError k c -> Bool
$c/= :: forall k c.
(Eq k, Eq c) =>
TokenizeError k c -> TokenizeError k c -> Bool
== :: TokenizeError k c -> TokenizeError k c -> Bool
$c== :: forall k c.
(Eq k, Eq c) =>
TokenizeError k c -> TokenizeError k c -> Bool
Eq)
mapTokErrKey :: (k -> k') -> TokenizeError k c -> TokenizeError k' c
mapTokErrKey :: (k -> k') -> TokenizeError k c -> TokenizeError k' c
mapTokErrKey k -> k'
f (NoWayTokenize TokId
pos [(k, [c])]
toks) =
TokId -> [(k', [c])] -> TokenizeError k' c
forall k c. TokId -> [(k, [c])] -> TokenizeError k c
NoWayTokenize TokId
pos (((k, [c]) -> (k', [c])) -> [(k, [c])] -> [(k', [c])]
forall a b. (a -> b) -> [a] -> [b]
map ((k -> k') -> (k, [c]) -> (k', [c])
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first k -> k'
f) [(k, [c])]
toks)
mapTokErrKey k -> k'
f (TwoWaysTokenize TokId
pos [(k, [c])]
toks [(k, [c])]
toks') =
TokId -> [(k', [c])] -> [(k', [c])] -> TokenizeError k' c
forall k c. TokId -> [(k, [c])] -> [(k, [c])] -> TokenizeError k c
TwoWaysTokenize TokId
pos (((k, [c]) -> (k', [c])) -> [(k, [c])] -> [(k', [c])]
forall a b. (a -> b) -> [a] -> [b]
map ((k -> k') -> (k, [c]) -> (k', [c])
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first k -> k'
f) [(k, [c])]
toks) (((k, [c]) -> (k', [c])) -> [(k, [c])] -> [(k', [c])]
forall a b. (a -> b) -> [a] -> [b]
map ((k -> k') -> (k, [c]) -> (k', [c])
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first k -> k'
f) [(k, [c])]
toks')
tokenize :: forall k c. Ord c => TokenizeMap k c -> [c] -> Either (TokenizeError k c) [(k, [c])]
tokenize :: TokenizeMap k c -> [c] -> Either (TokenizeError k c) [(k, [c])]
tokenize TokenizeMap {Map c [RToken c]
charTokMap :: Map c [RToken c]
$sel:charTokMap:TokenizeMap :: forall k c. TokenizeMap k c -> Map c [RToken c]
charTokMap, [RToken c]
blackToks :: [RToken c]
$sel:blackToks:TokenizeMap :: forall k c. TokenizeMap k c -> [RToken c]
blackToks, IntMap k
tokNames :: IntMap k
$sel:tokNames:TokenizeMap :: forall k c. TokenizeMap k c -> IntMap k
tokNames} [c]
cs =
(TokenizeError TokId c -> TokenizeError k c)
-> ([(TokId, [c])] -> [(k, [c])])
-> Either (TokenizeError TokId c) [(TokId, [c])]
-> Either (TokenizeError k c) [(k, [c])]
forall (p :: * -> * -> *) a b c d.
Bifunctor p =>
(a -> b) -> (c -> d) -> p a c -> p b d
bimap TokenizeError TokId c -> TokenizeError k c
nameTokErr [(TokId, [c])] -> [(k, [c])]
nameTokRes (Either (TokenizeError TokId c) [(TokId, [c])]
-> Either (TokenizeError k c) [(k, [c])])
-> Either (TokenizeError TokId c) [(TokId, [c])]
-> Either (TokenizeError k c) [(k, [c])]
forall a b. (a -> b) -> a -> b
$ (State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])])
-> IntMap (Either (TokenizeError TokId c) [(TokId, [c])])
-> Either (TokenizeError TokId c) [(TokId, [c])])
-> IntMap (Either (TokenizeError TokId c) [(TokId, [c])])
-> State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])])
-> Either (TokenizeError TokId c) [(TokId, [c])]
forall a b c. (a -> b -> c) -> b -> a -> c
flip State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])])
-> IntMap (Either (TokenizeError TokId c) [(TokId, [c])])
-> Either (TokenizeError TokId c) [(TokId, [c])]
forall s a. State s a -> s -> a
evalState IntMap (Either (TokenizeError TokId c) [(TokId, [c])])
forall a. Monoid a => a
mempty (State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])])
-> Either (TokenizeError TokId c) [(TokId, [c])])
-> State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])])
-> Either (TokenizeError TokId c) [(TokId, [c])]
forall a b. (a -> b) -> a -> b
$ TokId
-> [c]
-> [c]
-> State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])])
h TokId
0 [] [c]
cs
where
nameTokErr :: TokenizeError TokId c -> TokenizeError k c
nameTokErr :: TokenizeError TokId c -> TokenizeError k c
nameTokErr = (TokId -> k) -> TokenizeError TokId c -> TokenizeError k c
forall k k' c. (k -> k') -> TokenizeError k c -> TokenizeError k' c
mapTokErrKey (IntMap k
tokNames IntMap k -> TokId -> k
forall a. IntMap a -> TokId -> a
IM.!)
nameTokRes :: [(TokId, [c])] -> [(k, [c])]
nameTokRes :: [(TokId, [c])] -> [(k, [c])]
nameTokRes = ((TokId, [c]) -> (k, [c])) -> [(TokId, [c])] -> [(k, [c])]
forall a b. (a -> b) -> [a] -> [b]
map (((TokId, [c]) -> (k, [c])) -> [(TokId, [c])] -> [(k, [c])])
-> ((TokId, [c]) -> (k, [c])) -> [(TokId, [c])] -> [(k, [c])]
forall a b. (a -> b) -> a -> b
$ (TokId -> k) -> (TokId, [c]) -> (k, [c])
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first (IntMap k
tokNames IntMap k -> TokId -> k
forall a. IntMap a -> TokId -> a
IM.!)
h :: Int -> [c] -> [c] -> State (IntMap (Res c)) (Res c)
h :: TokId
-> [c]
-> [c]
-> State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])])
h TokId
_ [c]
_ [] = Either (TokenizeError TokId c) [(TokId, [c])]
-> State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])])
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either (TokenizeError TokId c) [(TokId, [c])]
-> State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])]))
-> Either (TokenizeError TokId c) [(TokId, [c])]
-> State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])])
forall a b. (a -> b) -> a -> b
$ [(TokId, [c])] -> Either (TokenizeError TokId c) [(TokId, [c])]
forall a b. b -> Either a b
Right []
h TokId
pos [c]
prevs nexts :: [c]
nexts@(c
cur : [c]
_) = do
Maybe (Either (TokenizeError TokId c) [(TokId, [c])])
mres <- (IntMap (Either (TokenizeError TokId c) [(TokId, [c])])
-> Maybe (Either (TokenizeError TokId c) [(TokId, [c])]))
-> StateT
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
Identity
(Maybe (Either (TokenizeError TokId c) [(TokId, [c])]))
forall (m :: * -> *) s a. Monad m => (s -> a) -> StateT s m a
gets ((IntMap (Either (TokenizeError TokId c) [(TokId, [c])])
-> Maybe (Either (TokenizeError TokId c) [(TokId, [c])]))
-> StateT
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
Identity
(Maybe (Either (TokenizeError TokId c) [(TokId, [c])])))
-> (IntMap (Either (TokenizeError TokId c) [(TokId, [c])])
-> Maybe (Either (TokenizeError TokId c) [(TokId, [c])]))
-> StateT
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
Identity
(Maybe (Either (TokenizeError TokId c) [(TokId, [c])]))
forall a b. (a -> b) -> a -> b
$ TokId
-> IntMap (Either (TokenizeError TokId c) [(TokId, [c])])
-> Maybe (Either (TokenizeError TokId c) [(TokId, [c])])
forall a. TokId -> IntMap a -> Maybe a
IM.lookup TokId
pos
State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])])
-> (Either (TokenizeError TokId c) [(TokId, [c])]
-> State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])]))
-> Maybe (Either (TokenizeError TokId c) [(TokId, [c])])
-> State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])])
forall b a. b -> (a -> b) -> Maybe a -> b
maybe State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])])
acceptedToks Either (TokenizeError TokId c) [(TokId, [c])]
-> State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])])
forall (f :: * -> *) a. Applicative f => a -> f a
pure Maybe (Either (TokenizeError TokId c) [(TokId, [c])])
mres
where
allToks :: [RToken c]
allToks :: [RToken c]
allToks = [RToken c]
blackToks [RToken c] -> [RToken c] -> [RToken c]
forall a. Semigroup a => a -> a -> a
<> [RToken c] -> Maybe [RToken c] -> [RToken c]
forall a. a -> Maybe a -> a
fromMaybe [] (c -> Map c [RToken c] -> Maybe [RToken c]
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup c
cur Map c [RToken c]
charTokMap)
acceptedToks :: State (IntMap (Res c)) (Res c)
acceptedToks :: State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])])
acceptedToks =
((TokId, [c], [c])
-> Either (TokenizeError TokId c) [(TokId, [c])]
-> State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])]))
-> Either (TokenizeError TokId c) [(TokId, [c])]
-> [(TokId, [c], [c])]
-> State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])])
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> b -> m b) -> b -> t a -> m b
foldrM
( \(TokId
tokId, [c]
curs, [c]
nexts') Either (TokenizeError TokId c) [(TokId, [c])]
res'' -> do
let curTok :: (TokId, [c])
curTok = (TokId
tokId, [c]
curs)
Either (TokenizeError TokId c) [(TokId, [c])]
res' <- (TokId, [c])
-> Either (TokenizeError TokId c) [(TokId, [c])]
-> Either (TokenizeError TokId c) [(TokId, [c])]
forall c. (TokId, [c]) -> Res c -> Res c
addTok (TokId, [c])
curTok (Either (TokenizeError TokId c) [(TokId, [c])]
-> Either (TokenizeError TokId c) [(TokId, [c])])
-> State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])])
-> State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
TokId
-> [c]
-> [c]
-> State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])])
h (TokId
pos TokId -> TokId -> TokId
forall a. Num a => a -> a -> a
+ [c] -> TokId
forall (t :: * -> *) a. Foldable t => t a -> TokId
length [c]
curs) ([c] -> [c]
forall a. [a] -> [a]
reverse [c]
curs [c] -> [c] -> [c]
forall a. Semigroup a => a -> a -> a
<> [c]
prevs) [c]
nexts'
Either (TokenizeError TokId c) [(TokId, [c])]
-> State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])])
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either (TokenizeError TokId c) [(TokId, [c])]
-> State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])]))
-> Either (TokenizeError TokId c) [(TokId, [c])]
-> State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])])
forall a b. (a -> b) -> a -> b
$ case (Either (TokenizeError TokId c) [(TokId, [c])]
res', Either (TokenizeError TokId c) [(TokId, [c])]
res'') of
(Left TwoWaysTokenize {}, Either (TokenizeError TokId c) [(TokId, [c])]
_) -> Either (TokenizeError TokId c) [(TokId, [c])]
res'
(Either (TokenizeError TokId c) [(TokId, [c])]
_, Left TwoWaysTokenize {}) -> Either (TokenizeError TokId c) [(TokId, [c])]
res''
(Left NoWayTokenize {}, Right [(TokId, [c])]
_) -> Either (TokenizeError TokId c) [(TokId, [c])]
res''
(Right [(TokId, [c])]
_, Left NoWayTokenize {}) -> Either (TokenizeError TokId c) [(TokId, [c])]
res'
(Left (NoWayTokenize TokId
l' [(TokId, [c])]
_), Left (NoWayTokenize TokId
l'' [(TokId, [c])]
_)) ->
if TokId
l' TokId -> TokId -> Bool
forall a. Ord a => a -> a -> Bool
> TokId
l'' then Either (TokenizeError TokId c) [(TokId, [c])]
res' else Either (TokenizeError TokId c) [(TokId, [c])]
res''
(Right [(TokId, [c])]
toks', Right [(TokId, [c])]
toks'') ->
TokenizeError TokId c
-> Either (TokenizeError TokId c) [(TokId, [c])]
forall a b. a -> Either a b
Left (TokenizeError TokId c
-> Either (TokenizeError TokId c) [(TokId, [c])])
-> TokenizeError TokId c
-> Either (TokenizeError TokId c) [(TokId, [c])]
forall a b. (a -> b) -> a -> b
$ TokId -> [(TokId, [c])] -> [(TokId, [c])] -> TokenizeError TokId c
forall k c. TokId -> [(k, [c])] -> [(k, [c])] -> TokenizeError k c
TwoWaysTokenize TokId
pos [(TokId, [c])]
toks' [(TokId, [c])]
toks''
)
((TokenizeError TokId c
-> Either (TokenizeError TokId c) [(TokId, [c])]
forall a b. a -> Either a b
Left (TokenizeError TokId c
-> Either (TokenizeError TokId c) [(TokId, [c])])
-> TokenizeError TokId c
-> Either (TokenizeError TokId c) [(TokId, [c])]
forall a b. (a -> b) -> a -> b
$ TokId -> [(TokId, [c])] -> TokenizeError TokId c
forall k c. TokId -> [(k, [c])] -> TokenizeError k c
NoWayTokenize TokId
pos []) :: Res c)
([(TokId, [c], [c])]
-> State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])]))
-> [(TokId, [c], [c])]
-> State
(IntMap (Either (TokenizeError TokId c) [(TokId, [c])]))
(Either (TokenizeError TokId c) [(TokId, [c])])
forall a b. (a -> b) -> a -> b
$ (RToken c -> [(TokId, [c], [c])])
-> [RToken c] -> [(TokId, [c], [c])]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ((RToken c -> Alt (TokId, [c], [c]))
-> RToken c -> [(TokId, [c], [c])]
coerce ((RToken c -> Alt (TokId, [c], [c]))
-> RToken c -> [(TokId, [c], [c])])
-> (RToken c -> Alt (TokId, [c], [c]))
-> RToken c
-> [(TokId, [c], [c])]
forall a b. (a -> b) -> a -> b
$ [c] -> [c] -> RToken c -> Alt (TokId, [c], [c])
forall c. Ord c => [c] -> [c] -> RToken c -> Alt (TokId, [c], [c])
accepts [c]
prevs [c]
nexts) [RToken c]
allToks
addTok :: (TokId, [c]) -> Res c -> Res c
addTok :: (TokId, [c]) -> Res c -> Res c
addTok (TokId, [c])
tok = \case
Left (NoWayTokenize TokId
pos [(TokId, [c])]
toks) ->
TokenizeError TokId c -> Res c
forall a b. a -> Either a b
Left (TokenizeError TokId c -> Res c) -> TokenizeError TokId c -> Res c
forall a b. (a -> b) -> a -> b
$ TokId -> [(TokId, [c])] -> TokenizeError TokId c
forall k c. TokId -> [(k, [c])] -> TokenizeError k c
NoWayTokenize TokId
pos ((TokId, [c])
tok (TokId, [c]) -> [(TokId, [c])] -> [(TokId, [c])]
forall a. a -> [a] -> [a]
: [(TokId, [c])]
toks)
Left (TwoWaysTokenize TokId
len [(TokId, [c])]
toks [(TokId, [c])]
toks') ->
TokenizeError TokId c -> Res c
forall a b. a -> Either a b
Left (TokenizeError TokId c -> Res c) -> TokenizeError TokId c -> Res c
forall a b. (a -> b) -> a -> b
$ TokId -> [(TokId, [c])] -> [(TokId, [c])] -> TokenizeError TokId c
forall k c. TokId -> [(k, [c])] -> [(k, [c])] -> TokenizeError k c
TwoWaysTokenize TokId
len ((TokId, [c])
tok (TokId, [c]) -> [(TokId, [c])] -> [(TokId, [c])]
forall a. a -> [a] -> [a]
: [(TokId, [c])]
toks) ((TokId, [c])
tok (TokId, [c]) -> [(TokId, [c])] -> [(TokId, [c])]
forall a. a -> [a] -> [a]
: [(TokId, [c])]
toks')
Right [(TokId, [c])]
rs -> [(TokId, [c])] -> Res c
forall a b. b -> Either a b
Right ([(TokId, [c])] -> Res c) -> [(TokId, [c])] -> Res c
forall a b. (a -> b) -> a -> b
$ (TokId, [c])
tok (TokId, [c]) -> [(TokId, [c])] -> [(TokId, [c])]
forall a. a -> [a] -> [a]
: [(TokId, [c])]
rs
accepts :: Ord c => [c] -> [c] -> RToken c -> Alt (TokId, [c], [c])
accepts :: [c] -> [c] -> RToken c -> Alt (TokId, [c], [c])
accepts [c]
rprevs [c]
nexts RToken {TokId
tokId :: TokId
$sel:tokId:RToken :: forall c. RToken c -> TokId
tokId, [Repeatable c]
$sel:rbehind:RToken :: forall c. RToken c -> [Repeatable c]
rbehind :: [Repeatable c]
rbehind, [Repeatable c]
$sel:body:RToken :: forall c. RToken c -> [Repeatable c]
body :: [Repeatable c]
body, [Repeatable c]
$sel:ahead:RToken :: forall c. RToken c -> [Repeatable c]
ahead :: [Repeatable c]
ahead} = do
Bool -> Alt ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> Alt ()) -> Bool -> Alt ()
forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not (Bool -> Bool)
-> (Alt ([c], Rem c) -> Bool) -> Alt ([c], Rem c) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Alt ([c], Rem c) -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (Alt ([c], Rem c) -> Bool) -> Alt ([c], Rem c) -> Bool
forall a b. (a -> b) -> a -> b
$ [Repeatable c] -> [c] -> Alt ([c], Rem c)
forall c. Ord c => [Repeatable c] -> [c] -> Alt ([c], Rem c)
check [Repeatable c]
rbehind [c]
rprevs
([c]
curs, Rem c
rem) <- [Repeatable c] -> [c] -> Alt ([c], Rem c)
forall c. Ord c => [Repeatable c] -> [c] -> Alt ([c], Rem c)
check [Repeatable c]
body [c]
nexts
case Rem c
rem of
RemRepeatable [Repeatable c]
_ -> Alt (TokId, [c], [c])
forall (f :: * -> *) a. Alternative f => f a
empty
RemString [c]
cs' -> do
Bool -> Alt ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> Alt ()) -> Bool -> Alt ()
forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not (Bool -> Bool)
-> (Alt ([c], Rem c) -> Bool) -> Alt ([c], Rem c) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Alt ([c], Rem c) -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (Alt ([c], Rem c) -> Bool) -> Alt ([c], Rem c) -> Bool
forall a b. (a -> b) -> a -> b
$ [Repeatable c] -> [c] -> Alt ([c], Rem c)
forall c. Ord c => [Repeatable c] -> [c] -> Alt ([c], Rem c)
check [Repeatable c]
ahead [c]
cs'
(TokId, [c], [c]) -> Alt (TokId, [c], [c])
forall (f :: * -> *) a. Applicative f => a -> f a
pure (TokId
tokId, [c]
curs, [c]
cs')
data Rem c = RemRepeatable [Repeatable c] | RemString [c]
deriving (Rem c -> Rem c -> Bool
(Rem c -> Rem c -> Bool) -> (Rem c -> Rem c -> Bool) -> Eq (Rem c)
forall c. Eq c => Rem c -> Rem c -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Rem c -> Rem c -> Bool
$c/= :: forall c. Eq c => Rem c -> Rem c -> Bool
== :: Rem c -> Rem c -> Bool
$c== :: forall c. Eq c => Rem c -> Rem c -> Bool
Eq, Eq (Rem c)
Eq (Rem c)
-> (Rem c -> Rem c -> Ordering)
-> (Rem c -> Rem c -> Bool)
-> (Rem c -> Rem c -> Bool)
-> (Rem c -> Rem c -> Bool)
-> (Rem c -> Rem c -> Bool)
-> (Rem c -> Rem c -> Rem c)
-> (Rem c -> Rem c -> Rem c)
-> Ord (Rem c)
Rem c -> Rem c -> Bool
Rem c -> Rem c -> Ordering
Rem c -> Rem c -> Rem c
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
forall c. Ord c => Eq (Rem c)
forall c. Ord c => Rem c -> Rem c -> Bool
forall c. Ord c => Rem c -> Rem c -> Ordering
forall c. Ord c => Rem c -> Rem c -> Rem c
min :: Rem c -> Rem c -> Rem c
$cmin :: forall c. Ord c => Rem c -> Rem c -> Rem c
max :: Rem c -> Rem c -> Rem c
$cmax :: forall c. Ord c => Rem c -> Rem c -> Rem c
>= :: Rem c -> Rem c -> Bool
$c>= :: forall c. Ord c => Rem c -> Rem c -> Bool
> :: Rem c -> Rem c -> Bool
$c> :: forall c. Ord c => Rem c -> Rem c -> Bool
<= :: Rem c -> Rem c -> Bool
$c<= :: forall c. Ord c => Rem c -> Rem c -> Bool
< :: Rem c -> Rem c -> Bool
$c< :: forall c. Ord c => Rem c -> Rem c -> Bool
compare :: Rem c -> Rem c -> Ordering
$ccompare :: forall c. Ord c => Rem c -> Rem c -> Ordering
$cp1Ord :: forall c. Ord c => Eq (Rem c)
Ord, TokId -> Rem c -> ShowS
[Rem c] -> ShowS
Rem c -> String
(TokId -> Rem c -> ShowS)
-> (Rem c -> String) -> ([Rem c] -> ShowS) -> Show (Rem c)
forall c. Show c => TokId -> Rem c -> ShowS
forall c. Show c => [Rem c] -> ShowS
forall c. Show c => Rem c -> String
forall a.
(TokId -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Rem c] -> ShowS
$cshowList :: forall c. Show c => [Rem c] -> ShowS
show :: Rem c -> String
$cshow :: forall c. Show c => Rem c -> String
showsPrec :: TokId -> Rem c -> ShowS
$cshowsPrec :: forall c. Show c => TokId -> Rem c -> ShowS
Show)
check :: Ord c => [Repeatable c] -> [c] -> Alt ([c], Rem c)
check :: [Repeatable c] -> [c] -> Alt ([c], Rem c)
check [] [c]
cs = ([c], Rem c) -> Alt ([c], Rem c)
forall (f :: * -> *) a. Applicative f => a -> f a
pure ([], [c] -> Rem c
forall c. [c] -> Rem c
RemString [c]
cs)
check [Repeatable c]
rs [] = ([c], Rem c) -> Alt ([c], Rem c)
forall (f :: * -> *) a. Applicative f => a -> f a
pure ([], [Repeatable c] -> Rem c
forall c. [Repeatable c] -> Rem c
RemRepeatable [Repeatable c]
rs)
check rs0 :: [Repeatable c]
rs0@(Repeatable Count
cnt BlackWhiteSet c
bws : [Repeatable c]
rs) (c
c : [c]
cs) = do
Bool -> Alt ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> Alt ()) -> Bool -> Alt ()
forall a b. (a -> b) -> a -> b
$ c
c c -> BlackWhiteSet c -> Bool
forall c. Ord c => c -> BlackWhiteSet c -> Bool
`BWS.member` BlackWhiteSet c
bws
([c] -> [c]) -> ([c], Rem c) -> ([c], Rem c)
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first (c
c c -> [c] -> [c]
forall a. a -> [a] -> [a]
:) (([c], Rem c) -> ([c], Rem c))
-> Alt ([c], Rem c) -> Alt ([c], Rem c)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> case Count
cnt of
Count
One -> [Repeatable c] -> [c] -> Alt ([c], Rem c)
forall c. Ord c => [Repeatable c] -> [c] -> Alt ([c], Rem c)
check [Repeatable c]
rs [c]
cs
Count
Some -> [Repeatable c] -> [c] -> Alt ([c], Rem c)
forall c. Ord c => [Repeatable c] -> [c] -> Alt ([c], Rem c)
check [Repeatable c]
rs [c]
cs Alt ([c], Rem c) -> Alt ([c], Rem c) -> Alt ([c], Rem c)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> [Repeatable c] -> [c] -> Alt ([c], Rem c)
forall c. Ord c => [Repeatable c] -> [c] -> Alt ([c], Rem c)
check [Repeatable c]
rs0 [c]
cs
type Res c = Either (TokenizeError TokId c) [(TokId, [c])]