{-# LANGUAGE
  DeriveTraversable,
  EmptyDataDeriving,
  FlexibleInstances,
  StandaloneDeriving,
  StrictData,
  UndecidableInstances #-}

module Little.Earley.Internal.Tree where

import Control.Monad (guard)
import Control.Applicative (Alternative(..), liftA2)
import Data.Function (on)
import Data.List (groupBy, sortBy)
import Data.Maybe (fromMaybe)
import Data.Ord (comparing)

import Data.Sequence (Seq)
import qualified Data.Sequence as Seq
import Data.Set (Set)
import qualified Data.Set as Set

import Little.Earley.Internal.Core

-- | Generalized parse tree.
--
-- A basic parse tree ('Tree') consists of leaves labeled terminal symbols @t@ ('Leaf')
-- and nodes labeled with grammar rules associated to nonterminal symbols (@('Brch')@).
--
-- Other variants of parse trees ('TreeSet', 'TruncatedTreeSet') can be represented
-- using extension nodes ('Ext').
--
-- Trees may be infinite due to an input string matching infinitely many parse trees.
-- Note that even though @StrictData@ is enabled, we get laziness via the list type @[]@
-- and tuple type @(,)@.
data TreeT f n t c
  = Leaf Int t c  -- ^ The @Int@ field is the position of the token in the input.
  | Brch (RuleId n) Int Int [TreeT f n t c]
    -- ^ The @Int@ fields are the endpoints of this subtree in the input.
  | Ext (f (TreeT f n t c))

deriving instance (Eq n, Eq t, Eq c, Eq (f (TreeT f n t c))) => Eq (TreeT f n t c)
deriving instance (Show n, Show t, Show c, Show (f (TreeT f n t c))) => Show (TreeT f n t c)

-- | Basic parse tree.
type Tree = TreeT NoExt

-- | A set of 'Tree', using a compact encoding.
type TreeSet = TreeT Choice

-- | Result of 'truncateTree' applied to a 'Tree'.
type TruncatedTree = TreeT (Sum Ellipsis NoExt)

-- | Result of 'truncateTree' applied to a 'TreeSet'.
type TruncatedTreeSet = TreeT (Sum Ellipsis Choice)

-- * Functors for extending @TreeT@

-- | No extensions.
data NoExt a
  deriving (Eq, Show, Functor)

-- | Choice constructor to represent 'TreeSet'.
data Choice a = a :|: a
  deriving (Eq, Show, Functor)

-- | Overloaded version of @(':|:')@.
class HasChoice f where
  (.:|:) :: a -> a -> f a

instance HasChoice Choice where
  (.:|:) = (:|:)

instance HasChoice (Sum f Choice) where
  x .:|: y = InR (x :|: y)

-- | Construct the disjunction of two trees featuring the 'Choice' functor.
(|:) :: HasChoice f => TreeT f n t c -> TreeT f n t c -> TreeT f n t c
u |: u' = Ext (u .:|: u')

-- | Ellided by 'truncateTree'.
data Ellipsis a = Ellipsis
  deriving (Eq, Show, Functor)

-- | Empty tree.
ellipsis :: TreeT (Sum Ellipsis f) n t c
ellipsis = Ext (InL Ellipsis)

-- | Like 'Data.Functor.Sum.Sum' from "Data.Functor.Sum" but with more basic instances
data Sum f g a = InL (f a) | InR (g a)
  deriving (Eq, Show, Functor)

infixr 1 :|:, |:

-- | Parse a chain of tokens @[c]@ into a parse tree.
-- Simplified variant of 'Little.Earley.parse'.
parseTreeSet :: (Ord n, Ord t) => Grammar n t c -> n -> [c] -> Maybe (TreeSet n t c)
parseTreeSet g n = snd . parseTreeSet_ g n

parseTreeSet_ ::
  (Ord n, Ord t) =>
  Grammar n t c -> n -> [c] -> (Seq1 (Set (Item n t)), Maybe (TreeSet n t c))
parseTreeSet_ g n cs =
  let hs = preparse g n cs in
  (hs, growTree g (Seq.fromList cs) hs n 0 (endOfSeq1 hs))

endOfSeq1 :: Seq1 a -> Int
endOfSeq1 (xs ::> _) = Seq.length xs

lookupSeq1 :: Int -> Seq1 a -> a
lookupSeq1 i (xs ::> x)
  | i == Seq.length xs = x
  | otherwise = Seq.index xs i

growTree ::
  (Ord n, Ord t) =>
  Grammar n t c -> Seq c -> Seq1 (Set (Item n t)) -> n -> Int -> Int -> Maybe (TreeSet n t c)
growTree g cs hs n i0 i1 =
  unionTrees (do
    Item ri _ i' <- matchItems hs n i1
    guard (i' == i0)
    growBranches g cs hs ri i0 i1)

matchItems :: (Ord n, Ord t) => Seq1 (Set (Item n t)) -> n -> Int -> [Item n t]
matchItems hs n i1 =
  [h | h@(Item (RuleId n' _) [] _) <- Set.toList (lookupSeq1 i1 hs), n' == n]

unionTrees :: [TreeSet n t c] -> Maybe (TreeSet n t c)
unionTrees (t : ts) = case unionTrees ts of
  Nothing -> Just t
  Just t' -> Just (t |: t')
unionTrees [] = Nothing

growBranches ::
  (Ord n, Ord t) =>
  Grammar n t c -> Seq c -> Seq1 (Set (Item n t)) -> RuleId n -> Int -> Int -> [TreeSet n t c]
growBranches g cs hs ri@(RuleId n j) i0 i1 =
  Brch ri i0 i1 <$> growBranchesFrom g cs hs i0 i1 (reverse (rules g n !! j)) []

sortAndGroupBy :: Ord b => (a -> b) -> [a] -> [[a]]
sortAndGroupBy f = groupBy ((==) `on` f) . sortBy (comparing f)

growBranchesFrom ::
  (Ord n, Ord t) =>
  Grammar n t c -> Seq c -> Seq1 (Set (Item n t)) ->
  Int -> Int -> [Atom n t] -> [TreeSet n t c] -> [[TreeSet n t c]]
growBranchesFrom _g _cs _hs i0 i1 _ _acc | i1 < i0 = []
growBranchesFrom _g _cs _hs i0 i1 [] acc
  | i0 == i1 = [acc]
  | otherwise = []
growBranchesFrom g cs hs i0 i1 (T t : xs) acc =
  growBranchesFrom g cs hs i0 (i1 - 1) xs (Leaf (i1 - 1) t (Seq.index cs (i1 - 1)) : acc)
growBranchesFrom g cs hs i0 i1 (N n : xs) acc = do
  let begin (Item _ _ i') = i'
  grp@(Item _ _ i' : _) <- sortAndGroupBy begin (matchItems hs n i1)
  -- This @u@ might be an infinite loop caused by right recursion,
  -- but it would end up being discarded in the recursive call to @growBranchesFrom@,
  -- so this is fine.
  let brch = grp >>= \(Item ri _ _i') -> growBranches g cs hs ri i' i1
      u = fromMaybe (error "Should not happen") (unionTrees brch)
  growBranchesFrom g cs hs i0 i' xs (u : acc)

-- | Truncate a tree to finite depth.
--
-- @
-- 'truncateTree' :: Int -> 'TreeSet' n t c -> 'TruncatedTreeSet' n t c
-- 'truncateTree' :: Int -> 'Tree' n t c -> 'TruncatedTree' n t c
-- @
truncateTree :: Functor f => Int -> TreeT f n t c -> TreeT (Sum Ellipsis f) n t c
truncateTree 0 _ = ellipsis
truncateTree _ (Leaf i t c) = Leaf i t c
truncateTree i (Brch n i0 i1 us) = Brch n i0 i1 (fmap (truncateTree (i-1)) us)
truncateTree i (Ext ext) = Ext (InR (truncateTree (i-1) <$> ext))

-- | Return 'Just' if the given 'TreeSet' represents a single 'Tree',
-- 'Nothing' otherwise (ambiguous parse tree).
fromSingleton :: TreeSet n t c -> Maybe (Tree n t c)
fromSingleton (Leaf i t c) = Just (Leaf i t c)
fromSingleton (Brch n i0 i1 us) = Brch n i0 i1 <$> traverse fromSingleton us
fromSingleton (Ext (_ :|: _)) = Nothing

-- @arbTree@: Get an arbitrary @Tree@ from a @TreeSet@.
-- This is not completely straightforward in the presence of infinitary
-- ambiguity. To ensure termination, we perform a kind of BFS; this is done
-- implicitly via the use of the @Lazy@ monad (aka. the delay monad).

data Lazy a = Now a | Later ~(Lazy a)
  deriving Functor

instance Applicative Lazy where
  pure = Now
  liftA2 f (Now x) y = fmap (f x) y
  liftA2 f x (Now y) = fmap (\x0 -> f x0 y) x
  liftA2 f (Later x) (Later y) = Later (liftA2 f x y)

instance Alternative Lazy where
  empty = Later empty
  Later x <|> Later y = Later (x <|> y)
  Now x <|> _ = Now x
  _ <|> Now y = Now y

forceLazy :: Lazy a -> a
forceLazy (Now x) = x
forceLazy (Later x) = forceLazy x

-- | Get an arbitrary @Tree@ from a @TreeSet@, even if it is ambiguous.
arbTree :: TreeSet n t c -> Tree n t c
arbTree = forceLazy . arbTree_

arbTree_ :: TreeSet n t c -> Lazy (Tree n t c)
arbTree_ (Leaf i t c) = pure (Leaf i t c)
arbTree_ (Brch n i0 i1 us) = Brch n i0 i1 <$> traverse arbTree_ us
arbTree_ (Ext (u :|: u')) = Later (arbTree_ u <|> arbTree_ u')

-- @ambiguities@: enumerate (some) ambiguous parses.
-- When an input is ambiguous, the ambiguity can often be minimized
-- to a relatively small fragment of the input.
-- However since a @TreeSet@ is potentially infinite, I'm not sure
-- how to find a "smallest" ambiguous subtree (containing a @Choice@ node).
-- Instead, we enumerate them all, so we can then stop the unbounded search at
-- an arbitrary point.

-- | An interval in some input sequence.
data Range = Range
  { rangeLen :: Int  -- ^ Length.
  , rangePos :: Int  -- ^ Starting position.
  } deriving (Eq, Ord, Show)

-- | Construt a range from its end points.
mkRange :: Int -> Int -> Range
mkRange i0 i1 = Range (i1 - i0) i0

treeRange :: Tree n t c -> Range
treeRange t = mkRange (leftEnd t) (rightEnd t)

leftEnd, rightEnd :: Tree n t c -> Int
leftEnd (Leaf i _ _) = i
leftEnd (Brch _ i0 _i1 _) = i0

rightEnd (Leaf i _ _) = i
rightEnd (Brch _ _i0 i1 _) = i1

-- | Evidence of ambiguity: two parse trees for the same input.
data Ambiguity n t c = Ambiguity (Tree n t c) (Tree n t c)
  deriving (Show)

-- | This instance treats @Ambiguity@ as an unordered pair.
instance (Eq n, Eq t, Eq c) => Eq (Ambiguity n t c) where
  Ambiguity x1 x2 == Ambiguity y1 y2 =
    (x1, x2) == (y1, y2) || (x1, x2) == (y2, y1)

-- | Ambiguity at a given location.
type LocAmbiguity n t c = (Range, Ambiguity n t c)

-- | Interleave two lists together.
-- This combines enumerations somewhat fairly.
(><) :: [a] -> [a] -> [a]
(x : xs) >< ys = x : (ys >< xs)
[] >< ys = ys

-- | Enumerate (some) ambiguous parses.
--
-- If there are multiple ambiguities at the same location, we just pick
-- an arbitrary example.
ambiguities :: TreeSet n t c -> [LocAmbiguity n t c]
ambiguities (Leaf _ _ _) = []
ambiguities (Brch _ _ _ us) = go us where
  go [] = []
  go (u : us1) = ambiguities u >< go us1
ambiguities (Ext (u :|: u')) =
  let au = arbTree u
      ambig = (treeRange au, Ambiguity au (arbTree u')) in
  ambig : (ambiguities' u >< ambiguities' u')

-- Skip ambiguities at the same location.
ambiguities' :: TreeSet n t c -> [LocAmbiguity n t c]
ambiguities' (Ext (u :|: u')) = ambiguities' u >< ambiguities' u'
ambiguities' u = ambiguities u