{- |
  Module        : Text.Tokenizer.Uniqueness
  Copyright     : (c) Lev Dvorkin, 2022
  License       : MIT
  Maintainer    : lev_135@mail.ru
  Stability     : Experimental

  This module contains implementation of uniqueness checking algorithm
  based on Sardinas-Patterson's algorithm
-}
module Text.Tokenizer.Uniqueness (
    Rem (..),
    MergeRes (..), mergeReps, mergedList, remList, rem1, rem2,
    Suff (..), Div (..), initDiv, stepDiv,
    ConflictTokens (..), checkUniqueTokenizing
  ) where

import Control.Applicative (Alternative (..))
import Control.Monad (guard)
import Data.Bifunctor (Bifunctor(..))
import qualified Data.Set as S
import Data.Coerce (coerce)

import qualified Text.Tokenizer.BlackWhiteSet as BWS
import Text.Tokenizer.Types
  (Alt (..), getBWS, RToken(..), TokId, Repeatable(..), Token (..), makeRToken, Count (..))

data Rem c
  -- | First list reminder. May be empty if there is no rem

  = Rem1 [Repeatable c]
  -- | Second list reminder. Always is nonempty

  | Rem2 [Repeatable c]

data MergeRes c = MergeRes
  { MergeRes c -> [Repeatable c]
merged :: [Repeatable c],
    MergeRes c -> Rem c
mergeRem   :: Rem c
  }

remList :: MergeRes c -> [Repeatable c]
remList :: MergeRes c -> [Repeatable c]
remList MergeRes{Rem c
mergeRem :: Rem c
$sel:mergeRem:MergeRes :: forall c. MergeRes c -> Rem c
mergeRem} = case Rem c
mergeRem of
  Rem1 [Repeatable c]
res -> [Repeatable c]
res
  Rem2 [Repeatable c]
res -> [Repeatable c]
res

mergedList :: MergeRes c -> [Repeatable c]
mergedList :: MergeRes c -> [Repeatable c]
mergedList m :: MergeRes c
m@MergeRes{[Repeatable c]
merged :: [Repeatable c]
$sel:merged:MergeRes :: forall c. MergeRes c -> [Repeatable c]
merged} = [Repeatable c]
merged [Repeatable c] -> [Repeatable c] -> [Repeatable c]
forall a. Semigroup a => a -> a -> a
<> MergeRes c -> [Repeatable c]
forall c. MergeRes c -> [Repeatable c]
remList MergeRes c
m

rem1 :: MergeRes c -> [Repeatable c]
rem1 :: MergeRes c -> [Repeatable c]
rem1 (MergeRes [Repeatable c]
_ (Rem1 [Repeatable c]
xs)) = [Repeatable c]
xs
rem1 MergeRes c
_ = []

rem2 :: MergeRes c -> [Repeatable c]
rem2 :: MergeRes c -> [Repeatable c]
rem2 (MergeRes [Repeatable c]
_ (Rem2 [Repeatable c]
ys)) = [Repeatable c]
ys
rem2 MergeRes c
_ = []

mergeReps :: (Ord c) => [Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
mergeReps :: [Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
mergeReps [Repeatable c]
xs [Repeatable c]
ys = case ([Repeatable c]
xs, [Repeatable c]
ys) of
  ([], []) -> MergeRes c -> Alt (MergeRes c)
forall (f :: * -> *) a. Applicative f => a -> f a
pure MergeRes :: forall c. [Repeatable c] -> Rem c -> MergeRes c
MergeRes {$sel:merged:MergeRes :: [Repeatable c]
merged = [], $sel:mergeRem:MergeRes :: Rem c
mergeRem = [Repeatable c] -> Rem c
forall c. [Repeatable c] -> Rem c
Rem1 []}
  ([Repeatable c]
xs, []) -> MergeRes c -> Alt (MergeRes c)
forall (f :: * -> *) a. Applicative f => a -> f a
pure MergeRes :: forall c. [Repeatable c] -> Rem c -> MergeRes c
MergeRes {$sel:merged:MergeRes :: [Repeatable c]
merged = [], $sel:mergeRem:MergeRes :: Rem c
mergeRem = [Repeatable c] -> Rem c
forall c. [Repeatable c] -> Rem c
Rem1 [Repeatable c]
xs}
  ([], [Repeatable c]
ys) -> MergeRes c -> Alt (MergeRes c)
forall (f :: * -> *) a. Applicative f => a -> f a
pure MergeRes :: forall c. [Repeatable c] -> Rem c -> MergeRes c
MergeRes {$sel:merged:MergeRes :: [Repeatable c]
merged = [], $sel:mergeRem:MergeRes :: Rem c
mergeRem = [Repeatable c] -> Rem c
forall c. [Repeatable c] -> Rem c
Rem2 [Repeatable c]
ys}
  (Repeatable c
x : [Repeatable c]
xs', Repeatable c
y : [Repeatable c]
ys') -> do
    let bws :: BlackWhiteSet c
bws = BlackWhiteSet c -> BlackWhiteSet c -> BlackWhiteSet c
forall c.
Ord c =>
BlackWhiteSet c -> BlackWhiteSet c -> BlackWhiteSet c
BWS.intersection (Repeatable c -> BlackWhiteSet c
forall c. Repeatable c -> BlackWhiteSet c
getBWS Repeatable c
x) (Repeatable c -> BlackWhiteSet c
forall c. Repeatable c -> BlackWhiteSet c
getBWS Repeatable c
y)
    Bool -> Alt ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> Alt ()) -> Bool -> Alt ()
forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ BlackWhiteSet c -> Bool
forall c. BlackWhiteSet c -> Bool
BWS.isEmpty BlackWhiteSet c
bws
    case (Repeatable c -> Count
forall c. Repeatable c -> Count
getCnt Repeatable c
x, Repeatable c -> Count
forall c. Repeatable c -> Count
getCnt Repeatable c
y) of
      (Count
One, Count
One) -> do
        res :: MergeRes c
res@MergeRes{[Repeatable c]
merged :: [Repeatable c]
$sel:merged:MergeRes :: forall c. MergeRes c -> [Repeatable c]
merged} <- [Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
forall c.
Ord c =>
[Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
mergeReps [Repeatable c]
xs' [Repeatable c]
ys'
        MergeRes c -> Alt (MergeRes c)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (MergeRes c -> Alt (MergeRes c)) -> MergeRes c -> Alt (MergeRes c)
forall a b. (a -> b) -> a -> b
$ MergeRes c
res{$sel:merged:MergeRes :: [Repeatable c]
merged = Count -> BlackWhiteSet c -> Repeatable c
forall c. Count -> BlackWhiteSet c -> Repeatable c
Repeatable Count
One BlackWhiteSet c
bws Repeatable c -> [Repeatable c] -> [Repeatable c]
forall a. a -> [a] -> [a]
: [Repeatable c]
merged}
      (Count
One, Count
Some) -> do
        res :: MergeRes c
res@MergeRes{[Repeatable c]
merged :: [Repeatable c]
$sel:merged:MergeRes :: forall c. MergeRes c -> [Repeatable c]
merged} <- [Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
forall c.
Ord c =>
[Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
mergeReps [Repeatable c]
xs' [Repeatable c]
ys Alt (MergeRes c) -> Alt (MergeRes c) -> Alt (MergeRes c)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> [Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
forall c.
Ord c =>
[Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
mergeReps [Repeatable c]
xs' [Repeatable c]
ys'
        MergeRes c -> Alt (MergeRes c)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (MergeRes c -> Alt (MergeRes c)) -> MergeRes c -> Alt (MergeRes c)
forall a b. (a -> b) -> a -> b
$ MergeRes c
res{$sel:merged:MergeRes :: [Repeatable c]
merged = Count -> BlackWhiteSet c -> Repeatable c
forall c. Count -> BlackWhiteSet c -> Repeatable c
Repeatable Count
One BlackWhiteSet c
bws Repeatable c -> [Repeatable c] -> [Repeatable c]
forall a. a -> [a] -> [a]
: [Repeatable c]
merged}
      (Count
Some, Count
One) -> do
        res :: MergeRes c
res@MergeRes{[Repeatable c]
merged :: [Repeatable c]
$sel:merged:MergeRes :: forall c. MergeRes c -> [Repeatable c]
merged} <- [Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
forall c.
Ord c =>
[Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
mergeReps [Repeatable c]
xs [Repeatable c]
ys' Alt (MergeRes c) -> Alt (MergeRes c) -> Alt (MergeRes c)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> [Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
forall c.
Ord c =>
[Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
mergeReps [Repeatable c]
xs' [Repeatable c]
ys'
        MergeRes c -> Alt (MergeRes c)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (MergeRes c -> Alt (MergeRes c)) -> MergeRes c -> Alt (MergeRes c)
forall a b. (a -> b) -> a -> b
$ MergeRes c
res{$sel:merged:MergeRes :: [Repeatable c]
merged = Count -> BlackWhiteSet c -> Repeatable c
forall c. Count -> BlackWhiteSet c -> Repeatable c
Repeatable Count
One BlackWhiteSet c
bws Repeatable c -> [Repeatable c] -> [Repeatable c]
forall a. a -> [a] -> [a]
: [Repeatable c]
merged}
      (Count
Some, Count
Some) -> do
        res :: MergeRes c
res@MergeRes{[Repeatable c]
merged :: [Repeatable c]
$sel:merged:MergeRes :: forall c. MergeRes c -> [Repeatable c]
merged} <-
          [Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
forall c.
Ord c =>
[Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
mergeReps [Repeatable c]
xs' [Repeatable c]
ys Alt (MergeRes c) -> Alt (MergeRes c) -> Alt (MergeRes c)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> [Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
forall c.
Ord c =>
[Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
mergeReps [Repeatable c]
xs [Repeatable c]
ys' Alt (MergeRes c) -> Alt (MergeRes c) -> Alt (MergeRes c)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> [Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
forall c.
Ord c =>
[Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
mergeReps [Repeatable c]
xs' [Repeatable c]
ys'
        MergeRes c -> Alt (MergeRes c)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (MergeRes c -> Alt (MergeRes c)) -> MergeRes c -> Alt (MergeRes c)
forall a b. (a -> b) -> a -> b
$ MergeRes c
res{$sel:merged:MergeRes :: [Repeatable c]
merged = Count -> BlackWhiteSet c -> Repeatable c
forall c. Count -> BlackWhiteSet c -> Repeatable c
Repeatable Count
Some BlackWhiteSet c
bws Repeatable c -> [Repeatable c] -> [Repeatable c]
forall a. a -> [a] -> [a]
: [Repeatable c]
merged}

-- | Dangling suffix

data Suff c = Suff
  { -- | Symbols behind suffix. Note that only @maxBehind@ symbols are preserved

    Suff c -> [Repeatable c]
srbeh   :: [Repeatable c],
    -- | Symbols from suffix' body

    Suff c -> [Repeatable c]
scur    :: [Repeatable c],
    -- | Symbols ahead suffix

    Suff c -> [Repeatable c]
sahead  :: [Repeatable c]
  }
  deriving (Suff c -> Suff c -> Bool
(Suff c -> Suff c -> Bool)
-> (Suff c -> Suff c -> Bool) -> Eq (Suff c)
forall c. Eq c => Suff c -> Suff c -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Suff c -> Suff c -> Bool
$c/= :: forall c. Eq c => Suff c -> Suff c -> Bool
== :: Suff c -> Suff c -> Bool
$c== :: forall c. Eq c => Suff c -> Suff c -> Bool
Eq, Eq (Suff c)
Eq (Suff c)
-> (Suff c -> Suff c -> Ordering)
-> (Suff c -> Suff c -> Bool)
-> (Suff c -> Suff c -> Bool)
-> (Suff c -> Suff c -> Bool)
-> (Suff c -> Suff c -> Bool)
-> (Suff c -> Suff c -> Suff c)
-> (Suff c -> Suff c -> Suff c)
-> Ord (Suff c)
Suff c -> Suff c -> Bool
Suff c -> Suff c -> Ordering
Suff c -> Suff c -> Suff c
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
forall c. Ord c => Eq (Suff c)
forall c. Ord c => Suff c -> Suff c -> Bool
forall c. Ord c => Suff c -> Suff c -> Ordering
forall c. Ord c => Suff c -> Suff c -> Suff c
min :: Suff c -> Suff c -> Suff c
$cmin :: forall c. Ord c => Suff c -> Suff c -> Suff c
max :: Suff c -> Suff c -> Suff c
$cmax :: forall c. Ord c => Suff c -> Suff c -> Suff c
>= :: Suff c -> Suff c -> Bool
$c>= :: forall c. Ord c => Suff c -> Suff c -> Bool
> :: Suff c -> Suff c -> Bool
$c> :: forall c. Ord c => Suff c -> Suff c -> Bool
<= :: Suff c -> Suff c -> Bool
$c<= :: forall c. Ord c => Suff c -> Suff c -> Bool
< :: Suff c -> Suff c -> Bool
$c< :: forall c. Ord c => Suff c -> Suff c -> Bool
compare :: Suff c -> Suff c -> Ordering
$ccompare :: forall c. Ord c => Suff c -> Suff c -> Ordering
$cp1Ord :: forall c. Ord c => Eq (Suff c)
Ord, Int -> Suff c -> ShowS
[Suff c] -> ShowS
Suff c -> String
(Int -> Suff c -> ShowS)
-> (Suff c -> String) -> ([Suff c] -> ShowS) -> Show (Suff c)
forall c. Show c => Int -> Suff c -> ShowS
forall c. Show c => [Suff c] -> ShowS
forall c. Show c => Suff c -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Suff c] -> ShowS
$cshowList :: forall c. Show c => [Suff c] -> ShowS
show :: Suff c -> String
$cshow :: forall c. Show c => Suff c -> String
showsPrec :: Int -> Suff c -> ShowS
$cshowsPrec :: forall c. Show c => Int -> Suff c -> ShowS
Show)

{- | Result of division.

  It looks like

  >       rtoks       |       lastTok
  > --------|---------|-----------------------|~~~~~
  >     rprefToks        |
  > -----|-----|---------|
  > suff (remained part):
  >               behind |     current        | ahead
  >               -------|====================|~~~~~
-}
data Div c = Div
  { -- | Tokens in main sequence, except last one

    Div c -> [(Int, Int)]
rtoks :: [(TokId, Int)],
    -- | Last token in main sequence

    Div c -> (Int, Int)
lastTok :: (TokId, Int),
    -- | Tokens in alter sequence

    Div c -> [(Int, Int)]
rprefToks :: [(TokId, Int)],
    -- | Processed symbols

    Div c -> [Repeatable c]
processed :: [Repeatable c],
    -- | Remained suffix

    Div c -> Suff c
suff :: Suff c
  }
  deriving (Div c -> Div c -> Bool
(Div c -> Div c -> Bool) -> (Div c -> Div c -> Bool) -> Eq (Div c)
forall c. Eq c => Div c -> Div c -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Div c -> Div c -> Bool
$c/= :: forall c. Eq c => Div c -> Div c -> Bool
== :: Div c -> Div c -> Bool
$c== :: forall c. Eq c => Div c -> Div c -> Bool
Eq, Eq (Div c)
Eq (Div c)
-> (Div c -> Div c -> Ordering)
-> (Div c -> Div c -> Bool)
-> (Div c -> Div c -> Bool)
-> (Div c -> Div c -> Bool)
-> (Div c -> Div c -> Bool)
-> (Div c -> Div c -> Div c)
-> (Div c -> Div c -> Div c)
-> Ord (Div c)
Div c -> Div c -> Bool
Div c -> Div c -> Ordering
Div c -> Div c -> Div c
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
forall c. Ord c => Eq (Div c)
forall c. Ord c => Div c -> Div c -> Bool
forall c. Ord c => Div c -> Div c -> Ordering
forall c. Ord c => Div c -> Div c -> Div c
min :: Div c -> Div c -> Div c
$cmin :: forall c. Ord c => Div c -> Div c -> Div c
max :: Div c -> Div c -> Div c
$cmax :: forall c. Ord c => Div c -> Div c -> Div c
>= :: Div c -> Div c -> Bool
$c>= :: forall c. Ord c => Div c -> Div c -> Bool
> :: Div c -> Div c -> Bool
$c> :: forall c. Ord c => Div c -> Div c -> Bool
<= :: Div c -> Div c -> Bool
$c<= :: forall c. Ord c => Div c -> Div c -> Bool
< :: Div c -> Div c -> Bool
$c< :: forall c. Ord c => Div c -> Div c -> Bool
compare :: Div c -> Div c -> Ordering
$ccompare :: forall c. Ord c => Div c -> Div c -> Ordering
$cp1Ord :: forall c. Ord c => Eq (Div c)
Ord, Int -> Div c -> ShowS
[Div c] -> ShowS
Div c -> String
(Int -> Div c -> ShowS)
-> (Div c -> String) -> ([Div c] -> ShowS) -> Show (Div c)
forall c. Show c => Int -> Div c -> ShowS
forall c. Show c => [Div c] -> ShowS
forall c. Show c => Div c -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Div c] -> ShowS
$cshowList :: forall c. Show c => [Div c] -> ShowS
show :: Div c -> String
$cshow :: forall c. Show c => Div c -> String
showsPrec :: Int -> Div c -> ShowS
$cshowsPrec :: forall c. Show c => Int -> Div c -> ShowS
Show)

initDiv :: RToken c -> Div c
initDiv :: RToken c -> Div c
initDiv RToken{Int
$sel:tokId:RToken :: forall c. RToken c -> Int
tokId :: Int
tokId, [Repeatable c]
$sel:body:RToken :: forall c. RToken c -> [Repeatable c]
body :: [Repeatable c]
body, [Repeatable c]
$sel:ahead:RToken :: forall c. RToken c -> [Repeatable c]
ahead :: [Repeatable c]
ahead} =
  Div :: forall c.
[(Int, Int)]
-> (Int, Int) -> [(Int, Int)] -> [Repeatable c] -> Suff c -> Div c
Div {
    $sel:rtoks:Div :: [(Int, Int)]
rtoks = [],
    $sel:lastTok:Div :: (Int, Int)
lastTok = (Int
tokId, Int
0),
    $sel:rprefToks:Div :: [(Int, Int)]
rprefToks = [],
    $sel:suff:Div :: Suff c
suff = Suff :: forall c.
[Repeatable c] -> [Repeatable c] -> [Repeatable c] -> Suff c
Suff {$sel:srbeh:Suff :: [Repeatable c]
srbeh = [], $sel:scur:Suff :: [Repeatable c]
scur = [Repeatable c]
body, $sel:sahead:Suff :: [Repeatable c]
sahead = [Repeatable c]
ahead},
    $sel:processed:Div :: [Repeatable c]
processed = []
  }

{- 1) Current token is smaller then available part:
  old:    srbeh         scur               sahead
      -------------|=====================|~~~~~~~~~~~~
  cur:   rbehind       body         ahead
      -------------|===========|~~~~~~~~~~~~~~~~~~~~~~
  new:           srbeh'           scur'     sahead'
      -------------------------|=========|~~~~~~~~~~~~

   2) Current token is bigger then available part:
  old:    srbeh        scur                 sahead
      -------------|=====================|~~~~~~~~~~~~~~~~~~~~~~
  cur:   rbehind       body                             ahead
      -------------|===============================|~~~~~~~~~~~~
  new:             srbeh'                   scur'    sahead'
      -----------------------------------|=========|~~~~~~~~~~~~
-}
stepDiv :: (Ord c) => Int -> Div c -> RToken c -> Alt (Div c)
stepDiv :: Int -> Div c -> RToken c -> Alt (Div c)
stepDiv
  Int
maxBehind
  Div{[(Int, Int)]
rtoks :: [(Int, Int)]
$sel:rtoks:Div :: forall c. Div c -> [(Int, Int)]
rtoks, [(Int, Int)]
rprefToks :: [(Int, Int)]
$sel:rprefToks:Div :: forall c. Div c -> [(Int, Int)]
rprefToks, (Int, Int)
lastTok :: (Int, Int)
$sel:lastTok:Div :: forall c. Div c -> (Int, Int)
lastTok, $sel:suff:Div :: forall c. Div c -> Suff c
suff = Suff{[Repeatable c]
srbeh :: [Repeatable c]
$sel:srbeh:Suff :: forall c. Suff c -> [Repeatable c]
srbeh, [Repeatable c]
scur :: [Repeatable c]
$sel:scur:Suff :: forall c. Suff c -> [Repeatable c]
scur, [Repeatable c]
sahead :: [Repeatable c]
$sel:sahead:Suff :: forall c. Suff c -> [Repeatable c]
sahead}, [Repeatable c]
processed :: [Repeatable c]
$sel:processed:Div :: forall c. Div c -> [Repeatable c]
processed}
  RToken{Int
tokId :: Int
$sel:tokId:RToken :: forall c. RToken c -> Int
tokId, [Repeatable c]
$sel:rbehind:RToken :: forall c. RToken c -> [Repeatable c]
rbehind :: [Repeatable c]
rbehind, [Repeatable c]
body :: [Repeatable c]
$sel:body:RToken :: forall c. RToken c -> [Repeatable c]
body, [Repeatable c]
ahead :: [Repeatable c]
$sel:ahead:RToken :: forall c. RToken c -> [Repeatable c]
ahead} = do
    [Repeatable c]
rbeh <- MergeRes c -> [Repeatable c]
forall c. MergeRes c -> [Repeatable c]
mergedList (MergeRes c -> [Repeatable c])
-> Alt (MergeRes c) -> Alt [Repeatable c]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
forall c.
Ord c =>
[Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
mergeReps [Repeatable c]
srbeh [Repeatable c]
rbehind
    MergeRes c
cur_body <- [Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
forall c.
Ord c =>
[Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
mergeReps [Repeatable c]
scur [Repeatable c]
body
    let proc' :: [Repeatable c]
proc' = [Repeatable c]
processed [Repeatable c] -> [Repeatable c] -> [Repeatable c]
forall a. Semigroup a => a -> a -> a
<> MergeRes c -> [Repeatable c]
forall c. MergeRes c -> [Repeatable c]
merged MergeRes c
cur_body
        srbeh' :: [Repeatable c]
srbeh' = Int -> [Repeatable c] -> [Repeatable c]
forall a. Int -> [a] -> [a]
take Int
maxBehind ([Repeatable c] -> [Repeatable c])
-> [Repeatable c] -> [Repeatable c]
forall a b. (a -> b) -> a -> b
$ [Repeatable c] -> [Repeatable c]
forall a. [a] -> [a]
reverse (MergeRes c -> [Repeatable c]
forall c. MergeRes c -> [Repeatable c]
merged MergeRes c
cur_body) [Repeatable c] -> [Repeatable c] -> [Repeatable c]
forall a. Semigroup a => a -> a -> a
<> [Repeatable c]
rbeh
        len :: Int
len = [Repeatable c] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (MergeRes c -> [Repeatable c]
forall c. MergeRes c -> [Repeatable c]
merged MergeRes c
cur_body)
    case MergeRes c -> Rem c
forall c. MergeRes c -> Rem c
mergeRem MergeRes c
cur_body of
      --      scur      |  sahead

      -- body   |     ahead

      -- srbeh' | scur' |  sahead'

      Rem1 [Repeatable c]
scurRem -> do
        ([Repeatable c]
scur', [Repeatable c]
sahead') <- do
          MergeRes c
tmp <- [Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
forall c.
Ord c =>
[Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
mergeReps [Repeatable c]
scurRem [Repeatable c]
ahead
          let scur' :: [Repeatable c]
scur' = MergeRes c -> [Repeatable c]
forall c. MergeRes c -> [Repeatable c]
merged MergeRes c
tmp [Repeatable c] -> [Repeatable c] -> [Repeatable c]
forall a. Semigroup a => a -> a -> a
<> MergeRes c -> [Repeatable c]
forall c. MergeRes c -> [Repeatable c]
rem1 MergeRes c
tmp
          [Repeatable c]
sahead' <- MergeRes c -> [Repeatable c]
forall c. MergeRes c -> [Repeatable c]
mergedList (MergeRes c -> [Repeatable c])
-> Alt (MergeRes c) -> Alt [Repeatable c]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
forall c.
Ord c =>
[Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
mergeReps [Repeatable c]
sahead (MergeRes c -> [Repeatable c]
forall c. MergeRes c -> [Repeatable c]
rem2 MergeRes c
tmp)
          ([Repeatable c], [Repeatable c])
-> Alt ([Repeatable c], [Repeatable c])
forall (f :: * -> *) a. Applicative f => a -> f a
pure ([Repeatable c]
scur', [Repeatable c]
sahead')
        Div c -> Alt (Div c)
forall (f :: * -> *) a. Applicative f => a -> f a
pure Div :: forall c.
[(Int, Int)]
-> (Int, Int) -> [(Int, Int)] -> [Repeatable c] -> Suff c -> Div c
Div {
            [(Int, Int)]
rtoks :: [(Int, Int)]
$sel:rtoks:Div :: [(Int, Int)]
rtoks,
            $sel:rprefToks:Div :: [(Int, Int)]
rprefToks = (Int
tokId, Int
len) (Int, Int) -> [(Int, Int)] -> [(Int, Int)]
forall a. a -> [a] -> [a]
: [(Int, Int)]
rprefToks,
            $sel:lastTok:Div :: (Int, Int)
lastTok = (Int -> Int) -> (Int, Int) -> (Int, Int)
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len) (Int, Int)
lastTok,
            $sel:suff:Div :: Suff c
suff = Suff :: forall c.
[Repeatable c] -> [Repeatable c] -> [Repeatable c] -> Suff c
Suff { $sel:srbeh:Suff :: [Repeatable c]
srbeh = [Repeatable c]
srbeh', $sel:scur:Suff :: [Repeatable c]
scur = [Repeatable c]
scur', $sel:sahead:Suff :: [Repeatable c]
sahead = [Repeatable c]
sahead' },
            $sel:processed:Div :: [Repeatable c]
processed = [Repeatable c]
proc'
          }
      -- scur   |     sahead

      --      body      |  ahead

      -- srbeh' | scur' |  sahead'

      Rem2 [Repeatable c]
bodyRem -> do
        ([Repeatable c]
scur', [Repeatable c]
sahead') <- do
          MergeRes c
tmp <- [Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
forall c.
Ord c =>
[Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
mergeReps [Repeatable c]
sahead [Repeatable c]
bodyRem
          let scur' :: [Repeatable c]
scur' = MergeRes c -> [Repeatable c]
forall c. MergeRes c -> [Repeatable c]
merged MergeRes c
tmp [Repeatable c] -> [Repeatable c] -> [Repeatable c]
forall a. Semigroup a => a -> a -> a
<> MergeRes c -> [Repeatable c]
forall c. MergeRes c -> [Repeatable c]
rem2 MergeRes c
tmp
          [Repeatable c]
sahead' <- MergeRes c -> [Repeatable c]
forall c. MergeRes c -> [Repeatable c]
mergedList (MergeRes c -> [Repeatable c])
-> Alt (MergeRes c) -> Alt [Repeatable c]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
forall c.
Ord c =>
[Repeatable c] -> [Repeatable c] -> Alt (MergeRes c)
mergeReps (MergeRes c -> [Repeatable c]
forall c. MergeRes c -> [Repeatable c]
rem1 MergeRes c
tmp) [Repeatable c]
ahead
          ([Repeatable c], [Repeatable c])
-> Alt ([Repeatable c], [Repeatable c])
forall (f :: * -> *) a. Applicative f => a -> f a
pure ([Repeatable c]
scur', [Repeatable c]
sahead')
        Div c -> Alt (Div c)
forall (f :: * -> *) a. Applicative f => a -> f a
pure Div :: forall c.
[(Int, Int)]
-> (Int, Int) -> [(Int, Int)] -> [Repeatable c] -> Suff c -> Div c
Div {
            $sel:rtoks:Div :: [(Int, Int)]
rtoks = [(Int, Int)]
rprefToks,
            $sel:rprefToks:Div :: [(Int, Int)]
rprefToks = (Int -> Int) -> (Int, Int) -> (Int, Int)
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len) (Int, Int)
lastTok (Int, Int) -> [(Int, Int)] -> [(Int, Int)]
forall a. a -> [a] -> [a]
: [(Int, Int)]
rtoks,
            $sel:lastTok:Div :: (Int, Int)
lastTok = (Int
tokId, Int
len),
            $sel:suff:Div :: Suff c
suff = Suff :: forall c.
[Repeatable c] -> [Repeatable c] -> [Repeatable c] -> Suff c
Suff {$sel:srbeh:Suff :: [Repeatable c]
srbeh = [Repeatable c]
srbeh', $sel:scur:Suff :: [Repeatable c]
scur = [Repeatable c]
scur', $sel:sahead:Suff :: [Repeatable c]
sahead = [Repeatable c]
sahead'},
            $sel:processed:Div :: [Repeatable c]
processed = [Repeatable c]
proc'
          }

-- | Two ways of tokenizing a string, demonstrating non-uniqueness

data ConflictTokens k c = ConflictTokens {
    ConflictTokens k c -> [(k, [BlackWhiteSet c])]
tokList1, ConflictTokens k c -> [(k, [BlackWhiteSet c])]
tokList2 :: [(k, [BWS.BlackWhiteSet c])]
  } deriving (Int -> ConflictTokens k c -> ShowS
[ConflictTokens k c] -> ShowS
ConflictTokens k c -> String
(Int -> ConflictTokens k c -> ShowS)
-> (ConflictTokens k c -> String)
-> ([ConflictTokens k c] -> ShowS)
-> Show (ConflictTokens k c)
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall k c. (Show k, Show c) => Int -> ConflictTokens k c -> ShowS
forall k c. (Show k, Show c) => [ConflictTokens k c] -> ShowS
forall k c. (Show k, Show c) => ConflictTokens k c -> String
showList :: [ConflictTokens k c] -> ShowS
$cshowList :: forall k c. (Show k, Show c) => [ConflictTokens k c] -> ShowS
show :: ConflictTokens k c -> String
$cshow :: forall k c. (Show k, Show c) => ConflictTokens k c -> String
showsPrec :: Int -> ConflictTokens k c -> ShowS
$cshowsPrec :: forall k c. (Show k, Show c) => Int -> ConflictTokens k c -> ShowS
Show, ConflictTokens k c -> ConflictTokens k c -> Bool
(ConflictTokens k c -> ConflictTokens k c -> Bool)
-> (ConflictTokens k c -> ConflictTokens k c -> Bool)
-> Eq (ConflictTokens k c)
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
forall k c.
(Eq k, Eq c) =>
ConflictTokens k c -> ConflictTokens k c -> Bool
/= :: ConflictTokens k c -> ConflictTokens k c -> Bool
$c/= :: forall k c.
(Eq k, Eq c) =>
ConflictTokens k c -> ConflictTokens k c -> Bool
== :: ConflictTokens k c -> ConflictTokens k c -> Bool
$c== :: forall k c.
(Eq k, Eq c) =>
ConflictTokens k c -> ConflictTokens k c -> Bool
Eq, Eq (ConflictTokens k c)
Eq (ConflictTokens k c)
-> (ConflictTokens k c -> ConflictTokens k c -> Ordering)
-> (ConflictTokens k c -> ConflictTokens k c -> Bool)
-> (ConflictTokens k c -> ConflictTokens k c -> Bool)
-> (ConflictTokens k c -> ConflictTokens k c -> Bool)
-> (ConflictTokens k c -> ConflictTokens k c -> Bool)
-> (ConflictTokens k c -> ConflictTokens k c -> ConflictTokens k c)
-> (ConflictTokens k c -> ConflictTokens k c -> ConflictTokens k c)
-> Ord (ConflictTokens k c)
ConflictTokens k c -> ConflictTokens k c -> Bool
ConflictTokens k c -> ConflictTokens k c -> Ordering
ConflictTokens k c -> ConflictTokens k c -> ConflictTokens k c
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
forall k c. (Ord k, Ord c) => Eq (ConflictTokens k c)
forall k c.
(Ord k, Ord c) =>
ConflictTokens k c -> ConflictTokens k c -> Bool
forall k c.
(Ord k, Ord c) =>
ConflictTokens k c -> ConflictTokens k c -> Ordering
forall k c.
(Ord k, Ord c) =>
ConflictTokens k c -> ConflictTokens k c -> ConflictTokens k c
min :: ConflictTokens k c -> ConflictTokens k c -> ConflictTokens k c
$cmin :: forall k c.
(Ord k, Ord c) =>
ConflictTokens k c -> ConflictTokens k c -> ConflictTokens k c
max :: ConflictTokens k c -> ConflictTokens k c -> ConflictTokens k c
$cmax :: forall k c.
(Ord k, Ord c) =>
ConflictTokens k c -> ConflictTokens k c -> ConflictTokens k c
>= :: ConflictTokens k c -> ConflictTokens k c -> Bool
$c>= :: forall k c.
(Ord k, Ord c) =>
ConflictTokens k c -> ConflictTokens k c -> Bool
> :: ConflictTokens k c -> ConflictTokens k c -> Bool
$c> :: forall k c.
(Ord k, Ord c) =>
ConflictTokens k c -> ConflictTokens k c -> Bool
<= :: ConflictTokens k c -> ConflictTokens k c -> Bool
$c<= :: forall k c.
(Ord k, Ord c) =>
ConflictTokens k c -> ConflictTokens k c -> Bool
< :: ConflictTokens k c -> ConflictTokens k c -> Bool
$c< :: forall k c.
(Ord k, Ord c) =>
ConflictTokens k c -> ConflictTokens k c -> Bool
compare :: ConflictTokens k c -> ConflictTokens k c -> Ordering
$ccompare :: forall k c.
(Ord k, Ord c) =>
ConflictTokens k c -> ConflictTokens k c -> Ordering
$cp1Ord :: forall k c. (Ord k, Ord c) => Eq (ConflictTokens k c)
Ord)

-- | Check that there is no list of symbols, that can be decomposed to ways

-- on the tokens from given list

checkUniqueTokenizing :: forall k c. (Ord c) =>
  [Token k c] -> Either (ConflictTokens k c) ()
checkUniqueTokenizing :: [Token k c] -> Either (ConflictTokens k c) ()
checkUniqueTokenizing [Token k c]
toks = do
  (Div c -> Either (ConflictTokens k c) ())
-> Alt (Div c) -> Either (ConflictTokens k c) ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (Set (Suff c) -> Div c -> Either (ConflictTokens k c) ()
h Set (Suff c)
forall a. Set a
S.empty)
    [Div c
res | RToken c
p <- Alt (RToken c)
allRToks,
           RToken c
p' <- Alt (RToken c)
allRToks,
           Div c
res <- Int -> Div c -> RToken c -> Alt (Div c)
forall c. Ord c => Int -> Div c -> RToken c -> Alt (Div c)
stepDiv Int
maxBehind (RToken c -> Div c
forall c. RToken c -> Div c
initDiv RToken c
p') RToken c
p
    ]
  where
    allRToks :: Alt (RToken c)
allRToks = [RToken c] -> Alt (RToken c)
forall a. [a] -> Alt a
Alt ([RToken c] -> Alt (RToken c)) -> [RToken c] -> Alt (RToken c)
forall a b. (a -> b) -> a -> b
$ (Int -> Token k c -> RToken c)
-> [Int] -> [Token k c] -> [RToken c]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Int -> Token k c -> RToken c
forall k c. Int -> Token k c -> RToken c
makeRToken [Int
0 ..] [Token k c]
toks
    maxBehind :: Int
maxBehind = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ (\Token {[BlackWhiteSet c]
$sel:behind:Token :: forall k c. Token k c -> [BlackWhiteSet c]
behind :: [BlackWhiteSet c]
behind} -> [BlackWhiteSet c] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [BlackWhiteSet c]
behind) (Token k c -> Int) -> [Token k c] -> [Int]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Token k c]
toks
    h :: S.Set (Suff c) -> Div c -> Either (ConflictTokens k c) ()
    h :: Set (Suff c) -> Div c -> Either (ConflictTokens k c) ()
h Set (Suff c)
olds curDiv :: Div c
curDiv@Div{[(Int, Int)]
rprefToks :: [(Int, Int)]
$sel:rprefToks:Div :: forall c. Div c -> [(Int, Int)]
rprefToks, [Repeatable c]
processed :: [Repeatable c]
$sel:processed:Div :: forall c. Div c -> [Repeatable c]
processed, (Int, Int)
lastTok :: (Int, Int)
$sel:lastTok:Div :: forall c. Div c -> (Int, Int)
lastTok, [(Int, Int)]
rtoks :: [(Int, Int)]
$sel:rtoks:Div :: forall c. Div c -> [(Int, Int)]
rtoks, $sel:suff:Div :: forall c. Div c -> Suff c
suff = suff :: Suff c
suff@Suff{[Repeatable c]
scur :: [Repeatable c]
$sel:scur:Suff :: forall c. Suff c -> [Repeatable c]
scur}} = do
      if [Repeatable c] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Repeatable c]
scur
      then
        case ([(Int, Int)]
rtoks, [(Int, Int)]
rprefToks) of
          ([], [(Int, Int)
tok]) | (Int, Int) -> Int
forall a b. (a, b) -> a
fst (Int, Int)
tok Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== (Int, Int) -> Int
forall a b. (a, b) -> a
fst (Int, Int)
lastTok -> () -> Either (ConflictTokens k c) ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
          ([(Int, Int)], [(Int, Int)])
_ -> ConflictTokens k c -> Either (ConflictTokens k c) ()
forall a b. a -> Either a b
Left ConflictTokens :: forall k c.
[(k, [BlackWhiteSet c])]
-> [(k, [BlackWhiteSet c])] -> ConflictTokens k c
ConflictTokens {
                  $sel:tokList1:ConflictTokens :: [(k, [BlackWhiteSet c])]
tokList1 = [(Int, Int)] -> [BlackWhiteSet c] -> [(k, [BlackWhiteSet c])]
hh ([(Int, Int)] -> [(Int, Int)]
forall a. [a] -> [a]
reverse [(Int, Int)]
rprefToks) ([BlackWhiteSet c] -> [(k, [BlackWhiteSet c])])
-> [BlackWhiteSet c] -> [(k, [BlackWhiteSet c])]
forall a b. (a -> b) -> a -> b
$ Repeatable c -> BlackWhiteSet c
forall c. Repeatable c -> BlackWhiteSet c
getBWS (Repeatable c -> BlackWhiteSet c)
-> [Repeatable c] -> [BlackWhiteSet c]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Repeatable c]
processed,
                  $sel:tokList2:ConflictTokens :: [(k, [BlackWhiteSet c])]
tokList2 = [(Int, Int)] -> [BlackWhiteSet c] -> [(k, [BlackWhiteSet c])]
hh ([(Int, Int)] -> [(Int, Int)]
forall a. [a] -> [a]
reverse ((Int, Int)
lastTok (Int, Int) -> [(Int, Int)] -> [(Int, Int)]
forall a. a -> [a] -> [a]
: [(Int, Int)]
rtoks)) ([BlackWhiteSet c] -> [(k, [BlackWhiteSet c])])
-> [BlackWhiteSet c] -> [(k, [BlackWhiteSet c])]
forall a b. (a -> b) -> a -> b
$ Repeatable c -> BlackWhiteSet c
forall c. Repeatable c -> BlackWhiteSet c
getBWS (Repeatable c -> BlackWhiteSet c)
-> [Repeatable c] -> [BlackWhiteSet c]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Repeatable c]
processed
                }
      else
        (Div c -> Either (ConflictTokens k c) ())
-> Alt (Div c) -> Either (ConflictTokens k c) ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (Set (Suff c) -> Div c -> Either (ConflictTokens k c) ()
h (Set (Suff c) -> Div c -> Either (ConflictTokens k c) ())
-> Set (Suff c) -> Div c -> Either (ConflictTokens k c) ()
forall a b. (a -> b) -> a -> b
$ Suff c -> Set (Suff c) -> Set (Suff c)
forall a. Ord a => a -> Set a -> Set a
S.insert Suff c
suff Set (Suff c)
olds)
          [ Div c
nextDiv | RToken c
tok <- Alt (RToken c)
allRToks,
                      nextDiv :: Div c
nextDiv@Div{$sel:suff:Div :: forall c. Div c -> Suff c
suff = Suff c
nextSuff} <- Int -> Div c -> RToken c -> Alt (Div c)
forall c. Ord c => Int -> Div c -> RToken c -> Alt (Div c)
stepDiv Int
maxBehind Div c
curDiv RToken c
tok,
                      Suff c
nextSuff Suff c -> Set (Suff c) -> Bool
forall a. Ord a => a -> Set a -> Bool
`S.notMember` Set (Suff c)
olds
          ]
    hh :: [(TokId, Int)] -> [BWS.BlackWhiteSet c] -> [(k, [BWS.BlackWhiteSet c])]
    hh :: [(Int, Int)] -> [BlackWhiteSet c] -> [(k, [BlackWhiteSet c])]
hh [] [BlackWhiteSet c]
_ = []
    hh ((Int
tokId, Int
len) : [(Int, Int)]
xs') [BlackWhiteSet c]
bwss = (k
name, [BlackWhiteSet c]
bws) (k, [BlackWhiteSet c])
-> [(k, [BlackWhiteSet c])] -> [(k, [BlackWhiteSet c])]
forall a. a -> [a] -> [a]
: [(Int, Int)] -> [BlackWhiteSet c] -> [(k, [BlackWhiteSet c])]
hh [(Int, Int)]
xs' [BlackWhiteSet c]
bwss'
      where
        ([BlackWhiteSet c]
bws, [BlackWhiteSet c]
bwss') = Int -> [BlackWhiteSet c] -> ([BlackWhiteSet c], [BlackWhiteSet c])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
len [BlackWhiteSet c]
bwss
        Token{k
$sel:name:Token :: forall k c. Token k c -> k
name :: k
name} = [Token k c]
toks [Token k c] -> Int -> Token k c
forall a. [a] -> Int -> a
!! Int -> Int
coerce Int
tokId