{-# LANGUAGE DeriveTraversable, EmptyDataDeriving, FlexibleInstances, StandaloneDeriving, StrictData, UndecidableInstances #-} module Little.Earley.Internal.Tree where import Control.Monad (guard) import Control.Applicative (Alternative(..), liftA2) import Data.Function (on) import Data.List (groupBy, sortBy) import Data.Maybe (fromMaybe) import Data.Ord (comparing) import Data.Sequence (Seq) import qualified Data.Sequence as Seq import Data.Set (Set) import qualified Data.Set as Set import Little.Earley.Internal.Core -- | Generalized parse tree. -- -- A basic parse tree ('Tree') consists of leaves labeled terminal symbols @t@ ('Leaf') -- and nodes labeled with grammar rules associated to nonterminal symbols (@('Brch')@). -- -- Other variants of parse trees ('TreeSet', 'TruncatedTreeSet') can be represented -- using extension nodes ('Ext'). -- -- Trees may be infinite due to an input string matching infinitely many parse trees. -- Note that even though @StrictData@ is enabled, we get laziness via the list type @[]@ -- and tuple type @(,)@. data TreeT f n t c = Leaf Int t c -- ^ The @Int@ field is the position of the token in the input. | Brch (RuleId n) Int Int [TreeT f n t c] -- ^ The @Int@ fields are the endpoints of this subtree in the input. | Ext (f (TreeT f n t c)) deriving instance (Eq n, Eq t, Eq c, Eq (f (TreeT f n t c))) => Eq (TreeT f n t c) deriving instance (Show n, Show t, Show c, Show (f (TreeT f n t c))) => Show (TreeT f n t c) -- | Basic parse tree. type Tree = TreeT NoExt -- | A set of 'Tree', using a compact encoding. type TreeSet = TreeT Choice -- | Result of 'truncateTree' applied to a 'Tree'. type TruncatedTree = TreeT (Sum Ellipsis NoExt) -- | Result of 'truncateTree' applied to a 'TreeSet'. type TruncatedTreeSet = TreeT (Sum Ellipsis Choice) -- * Functors for extending @TreeT@ -- | No extensions. data NoExt a deriving (Eq, Show, Functor) -- | Choice constructor to represent 'TreeSet'. data Choice a = a :|: a deriving (Eq, Show, Functor) -- | Overloaded version of @(':|:')@. class HasChoice f where (.:|:) :: a -> a -> f a instance HasChoice Choice where (.:|:) = (:|:) instance HasChoice (Sum f Choice) where x .:|: y = InR (x :|: y) -- | Construct the disjunction of two trees featuring the 'Choice' functor. (|:) :: HasChoice f => TreeT f n t c -> TreeT f n t c -> TreeT f n t c u |: u' = Ext (u .:|: u') -- | Ellided by 'truncateTree'. data Ellipsis a = Ellipsis deriving (Eq, Show, Functor) -- | Empty tree. ellipsis :: TreeT (Sum Ellipsis f) n t c ellipsis = Ext (InL Ellipsis) -- | Like 'Data.Functor.Sum.Sum' from "Data.Functor.Sum" but with more basic instances data Sum f g a = InL (f a) | InR (g a) deriving (Eq, Show, Functor) infixr 1 :|:, |: -- | Parse a chain of tokens @[c]@ into a parse tree. -- Simplified variant of 'Little.Earley.parse'. parseTreeSet :: (Ord n, Ord t) => Grammar n t c -> n -> [c] -> Maybe (TreeSet n t c) parseTreeSet g n = snd . parseTreeSet_ g n parseTreeSet_ :: (Ord n, Ord t) => Grammar n t c -> n -> [c] -> (Seq1 (Set (Item n t)), Maybe (TreeSet n t c)) parseTreeSet_ g n cs = let hs = preparse g n cs in (hs, growTree g (Seq.fromList cs) hs n 0 (endOfSeq1 hs)) endOfSeq1 :: Seq1 a -> Int endOfSeq1 (xs ::> _) = Seq.length xs lookupSeq1 :: Int -> Seq1 a -> a lookupSeq1 i (xs ::> x) | i == Seq.length xs = x | otherwise = Seq.index xs i growTree :: (Ord n, Ord t) => Grammar n t c -> Seq c -> Seq1 (Set (Item n t)) -> n -> Int -> Int -> Maybe (TreeSet n t c) growTree g cs hs n i0 i1 = unionTrees (do Item ri _ i' <- matchItems hs n i1 guard (i' == i0) growBranches g cs hs ri i0 i1) matchItems :: (Ord n, Ord t) => Seq1 (Set (Item n t)) -> n -> Int -> [Item n t] matchItems hs n i1 = [h | h@(Item (RuleId n' _) [] _) <- Set.toList (lookupSeq1 i1 hs), n' == n] unionTrees :: [TreeSet n t c] -> Maybe (TreeSet n t c) unionTrees (t : ts) = case unionTrees ts of Nothing -> Just t Just t' -> Just (t |: t') unionTrees [] = Nothing growBranches :: (Ord n, Ord t) => Grammar n t c -> Seq c -> Seq1 (Set (Item n t)) -> RuleId n -> Int -> Int -> [TreeSet n t c] growBranches g cs hs ri@(RuleId n j) i0 i1 = Brch ri i0 i1 <$> growBranchesFrom g cs hs i0 i1 (reverse (rules g n !! j)) [] sortAndGroupBy :: Ord b => (a -> b) -> [a] -> [[a]] sortAndGroupBy f = groupBy ((==) `on` f) . sortBy (comparing f) growBranchesFrom :: (Ord n, Ord t) => Grammar n t c -> Seq c -> Seq1 (Set (Item n t)) -> Int -> Int -> [Atom n t] -> [TreeSet n t c] -> [[TreeSet n t c]] growBranchesFrom _g _cs _hs i0 i1 _ _acc | i1 < i0 = [] growBranchesFrom _g _cs _hs i0 i1 [] acc | i0 == i1 = [acc] | otherwise = [] growBranchesFrom g cs hs i0 i1 (T t : xs) acc = growBranchesFrom g cs hs i0 (i1 - 1) xs (Leaf (i1 - 1) t (Seq.index cs (i1 - 1)) : acc) growBranchesFrom g cs hs i0 i1 (N n : xs) acc = do let begin (Item _ _ i') = i' grp@(Item _ _ i' : _) <- sortAndGroupBy begin (matchItems hs n i1) -- This @u@ might be an infinite loop caused by right recursion, -- but it would end up being discarded in the recursive call to @growBranchesFrom@, -- so this is fine. let brch = grp >>= \(Item ri _ _i') -> growBranches g cs hs ri i' i1 u = fromMaybe (error "Should not happen") (unionTrees brch) growBranchesFrom g cs hs i0 i' xs (u : acc) -- | Truncate a tree to finite depth. -- -- @ -- 'truncateTree' :: Int -> 'TreeSet' n t c -> 'TruncatedTreeSet' n t c -- 'truncateTree' :: Int -> 'Tree' n t c -> 'TruncatedTree' n t c -- @ truncateTree :: Functor f => Int -> TreeT f n t c -> TreeT (Sum Ellipsis f) n t c truncateTree 0 _ = ellipsis truncateTree _ (Leaf i t c) = Leaf i t c truncateTree i (Brch n i0 i1 us) = Brch n i0 i1 (fmap (truncateTree (i-1)) us) truncateTree i (Ext ext) = Ext (InR (truncateTree (i-1) <$> ext)) -- | Return 'Just' if the given 'TreeSet' represents a single 'Tree', -- 'Nothing' otherwise (ambiguous parse tree). fromSingleton :: TreeSet n t c -> Maybe (Tree n t c) fromSingleton (Leaf i t c) = Just (Leaf i t c) fromSingleton (Brch n i0 i1 us) = Brch n i0 i1 <$> traverse fromSingleton us fromSingleton (Ext (_ :|: _)) = Nothing -- @arbTree@: Get an arbitrary @Tree@ from a @TreeSet@. -- This is not completely straightforward in the presence of infinitary -- ambiguity. To ensure termination, we perform a kind of BFS; this is done -- implicitly via the use of the @Lazy@ monad (aka. the delay monad). data Lazy a = Now a | Later ~(Lazy a) deriving Functor instance Applicative Lazy where pure = Now liftA2 f (Now x) y = fmap (f x) y liftA2 f x (Now y) = fmap (\x0 -> f x0 y) x liftA2 f (Later x) (Later y) = Later (liftA2 f x y) instance Alternative Lazy where empty = Later empty Later x <|> Later y = Later (x <|> y) Now x <|> _ = Now x _ <|> Now y = Now y forceLazy :: Lazy a -> a forceLazy (Now x) = x forceLazy (Later x) = forceLazy x -- | Get an arbitrary @Tree@ from a @TreeSet@, even if it is ambiguous. arbTree :: TreeSet n t c -> Tree n t c arbTree = forceLazy . arbTree_ arbTree_ :: TreeSet n t c -> Lazy (Tree n t c) arbTree_ (Leaf i t c) = pure (Leaf i t c) arbTree_ (Brch n i0 i1 us) = Brch n i0 i1 <$> traverse arbTree_ us arbTree_ (Ext (u :|: u')) = Later (arbTree_ u <|> arbTree_ u') -- @ambiguities@: enumerate (some) ambiguous parses. -- When an input is ambiguous, the ambiguity can often be minimized -- to a relatively small fragment of the input. -- However since a @TreeSet@ is potentially infinite, I'm not sure -- how to find a "smallest" ambiguous subtree (containing a @Choice@ node). -- Instead, we enumerate them all, so we can then stop the unbounded search at -- an arbitrary point. -- | An interval in some input sequence. data Range = Range { rangeLen :: Int -- ^ Length. , rangePos :: Int -- ^ Starting position. } deriving (Eq, Ord, Show) -- | Construt a range from its end points. mkRange :: Int -> Int -> Range mkRange i0 i1 = Range (i1 - i0) i0 treeRange :: Tree n t c -> Range treeRange t = mkRange (leftEnd t) (rightEnd t) leftEnd, rightEnd :: Tree n t c -> Int leftEnd (Leaf i _ _) = i leftEnd (Brch _ i0 _i1 _) = i0 rightEnd (Leaf i _ _) = i rightEnd (Brch _ _i0 i1 _) = i1 -- | Evidence of ambiguity: two parse trees for the same input. data Ambiguity n t c = Ambiguity (Tree n t c) (Tree n t c) deriving (Show) -- | This instance treats @Ambiguity@ as an unordered pair. instance (Eq n, Eq t, Eq c) => Eq (Ambiguity n t c) where Ambiguity x1 x2 == Ambiguity y1 y2 = (x1, x2) == (y1, y2) || (x1, x2) == (y2, y1) -- | Ambiguity at a given location. type LocAmbiguity n t c = (Range, Ambiguity n t c) -- | Interleave two lists together. -- This combines enumerations somewhat fairly. (><) :: [a] -> [a] -> [a] (x : xs) >< ys = x : (ys >< xs) [] >< ys = ys -- | Enumerate (some) ambiguous parses. -- -- If there are multiple ambiguities at the same location, we just pick -- an arbitrary example. ambiguities :: TreeSet n t c -> [LocAmbiguity n t c] ambiguities (Leaf _ _ _) = [] ambiguities (Brch _ _ _ us) = go us where go [] = [] go (u : us1) = ambiguities u >< go us1 ambiguities (Ext (u :|: u')) = let au = arbTree u ambig = (treeRange au, Ambiguity au (arbTree u')) in ambig : (ambiguities' u >< ambiguities' u') -- Skip ambiguities at the same location. ambiguities' :: TreeSet n t c -> [LocAmbiguity n t c] ambiguities' (Ext (u :|: u')) = ambiguities' u >< ambiguities' u' ambiguities' u = ambiguities u