-- | Annotate subsequences of a 'Text' string with arbitrary metadata.
module Data.Text.Markup
  ( Markup
  , toMarkup
  , fromMarkup
  , markRegion
  )
where

import qualified Data.Sequence as S
import Data.Monoid ((<>))
import qualified Data.Text as T

data SequenceTree a =
    Node Int Int (S.Seq (SequenceTree a))
    | Leaf Int Int a
    deriving (Show, Eq)

-- | Markup.  This contains text along with markup.
data Markup a =
    Markup { _sourceText :: T.Text
           , _markupMapping :: SequenceTree a
           }
           deriving (Show, Eq)

-- | Convert a 'Text' value into 'Markup' with the accompanying metadata
-- value assigned to the entire 'Text' sequence.
toMarkup :: T.Text -> a -> Markup a
toMarkup t a = Markup t (Leaf 0 (T.length t) a)

-- | Recover the original text along with metadata assigned with
-- 'markRegion'.
fromMarkup :: Markup a -> [(T.Text, a)]
fromMarkup (Markup txt tree) =
    -- Get all leave nodes from the tree in order, then use their
    -- descriptors to chop up the text
    let descs = leaves tree
        initialState :: (T.Text, [(T.Text, a)])
        initialState = (txt, [])
        nextChunk (remainingText, prevChunks) (_, len, val) =
            let (thisText, remainingText') = T.splitAt len remainingText
                thisChunk = (thisText, val)
            in (remainingText', prevChunks <> [thisChunk])
        (_, chunks) = foldl nextChunk initialState descs
    in chunks

-- | Mark a region of text with the specified metadata.
markRegion :: (Eq a)
           => Int
           -- ^ The starting index to mark.
           -> Int
           -- ^ The size of the region to mark.
           -> a
           -- ^ The metadata to store for this region.
           -> Markup a
           -- ^ The markup to modify.
           -> Markup a
markRegion start len val m@(Markup txt t0) =
    if start < 0 || len < 0 then m else Markup txt t1
    where
        t1 = treeMarkRegion start len val t0

-- Need recursive algorithm to rebuild tree with nodes split up as
-- necessary
treeMarkRegion :: (Eq a) => Int -> Int -> a -> SequenceTree a -> SequenceTree a
treeMarkRegion newStart newLen newVal leaf@(Leaf lStart lLen oldVal) =
    if newLen == 0 || not (startInLeaf || endInLeaf || containsLeaf) then leaf
    else if length validLeaves == 1
         then S.index validLeaves 0
         else if S.length validLeaves > 1
              then case mergeNodes validLeaves of
                  Left l -> l
                  Right ls -> Node lStart lLen ls
              else leaf
    where
        end = newStart + newLen
        lEnd = lStart + lLen
        startInLeaf = newStart >= lStart && newStart <= lEnd
        endInLeaf = end >= lStart && end <= lEnd
        containsLeaf = newStart < lStart && newLen > lLen

        -- Clamp the new node leaf to the size of the current leaf since
        -- the request could be larger than this leaf
        newStart' = max lStart newStart
        newEnd = min lEnd (newStart + newLen)
        newLen' = newEnd - newStart'

        newLeaves = S.fromList [ Leaf lStart (newStart - lStart) oldVal
                               , Leaf newStart' newLen' newVal
                               , Leaf newEnd (lEnd - newEnd) oldVal
                               ]
        validLeaves = S.filter isValidLeaf newLeaves
        isValidLeaf (Leaf _ l _) = l > 0
        isValidLeaf _ = error "BUG: isValidLeaf got a Node!"

treeMarkRegion start len newVal node@(Node lStart lLen cs) =
    let end = start + len
        lEnd = lStart + lLen
        startInNode = start >= lStart && start <= lEnd
        endInNode   = end   >= lStart && end   <= lEnd
        containsNode = start < lStart && len > lLen
    -- If the start or end is somewhere in this node, we need to process
    -- the children
    in if startInNode || endInNode || containsNode
       then let newChildren = treeMarkRegion start len newVal <$> cs
            in case mergeNodes newChildren of
                Left single -> single
                Right many -> Node lStart lLen many
       else node

mergeNodes :: (Eq a) => S.Seq (SequenceTree a) -> Either (SequenceTree a) (S.Seq (SequenceTree a))
mergeNodes s
  | S.null s = Right S.empty
  | S.length s == 1 = Left $ S.index s 0
  | otherwise =
      let a = S.index s 0
          b = S.index s 1
          rest = S.drop 2 s
      in case mergeNodePair a b of
        Just m -> mergeNodes $ m S.<| rest
        Nothing -> case mergeNodes $ b S.<| rest of
            Left l -> Right $ S.fromList [a, l]
            Right ls -> Right $ a S.<| ls

sInit :: S.Seq a -> S.Seq a
sInit s = S.take ((S.length s) - 1) s

sHead :: S.Seq a -> a
sHead s = S.index s 0

sTail :: S.Seq a -> S.Seq a
sTail = S.drop 1

sLast :: S.Seq a -> a
sLast s = S.index s ((S.length s) - 1)

mergeNodePair :: (Eq a) => SequenceTree a -> SequenceTree a -> Maybe (SequenceTree a)
mergeNodePair (Leaf aStart aLen aVal) (Leaf bStart bLen bVal)
  | aVal == bVal && bStart == aStart + aLen = Just $ Leaf aStart (aLen + bLen) aVal
  | otherwise = Nothing
mergeNodePair leaf@(Leaf aStart aLen _) (Node _ bLen bs) = do
    merged <- mergeNodePair leaf $ sHead bs
    case mergeNodes $ merged S.<| (sTail bs) of
        Left single -> return single
        Right many -> return $ Node aStart (aStart + aLen + bLen) many
mergeNodePair (Node aStart aLen as) leaf@(Leaf _ bLen _)
  | length as > 0 = do
    merged <- mergeNodePair (sLast as) leaf
    case mergeNodes $ sInit as <> S.singleton merged of
        Left single -> return single
        Right many -> return $ Node aStart (aStart + aLen + bLen) many
mergeNodePair (Node aStart aLen as) (Node _ bLen bs)
  | length as > 0 && length bs > 0 = do
    merged <- mergeNodePair (sLast as) (sHead bs)
    case mergeNodes $ sInit as <> S.singleton merged <> sTail bs of
        Left single -> return single
        Right many -> return $ Node aStart (aStart + aLen + bLen) many
mergeNodePair _ _ = Nothing

leaves :: SequenceTree a -> [(Int, Int, a)]
leaves (Leaf st len a) = [(st, len, a)]
leaves (Node _ _ cs) = concat $ leaves <$> cs