-- | A data structure for a static forest. module Data.Forest.Static where import Control.Applicative ((<$>),(<*>)) import Control.Monad (replicateM) import Data.Foldable (toList) import Data.Graph.Inductive.Basic import Data.List (span,uncons,sort) import Data.Traversable (mapAccumL) import Data.Tree (Tree) import Debug.Trace import qualified Data.List as L import qualified Data.Map.Strict as S import qualified Data.Set as Set import qualified Data.Tree as T import qualified Data.Vector as V import qualified Data.Vector.Generic as VG import qualified Data.Vector.Unboxed as VU import Test.QuickCheck -- | Kind of possible @TreeOrder@s. -- -- TODO @In@ for in-order traversal? -- -- TODO @Unordered@ for trees that have no sorted order? data TreeOrder = Pre | Post | Unordered -- | A static forest structure. While traversals are always explicitly -- possible by following the indices, the nodes themselves shall always be -- ordered by the type @p :: TreeOrder@. This is not completely enforced, -- given that @Forest@ is exporting the constructor, but encouraged via -- construction with helper functions. The labels of type @a@ (in @label@) -- require a vector structure @v@ for @O(1)@ access. data Forest (p :: TreeOrder) v a where Forest :: (VG.Vector v a) => { label :: v a -- ^ Each node @k@ in @[0..n-1]@ has a label at @label ! k@. , parent :: VU.Vector Int -- ^ Each node @k@ has a parent node, or @-1@ if there is no such -- parent. , children :: V.Vector (VU.Vector Int) -- ^ Each node @k@ has a vector of indices for its children. For leaf -- nodes, the vector is empty. , lsib :: VU.Vector Int -- ^ The left sibling for a node @k@. Will *not* cross subtrees. I.e. -- if @k@ is @lsib@ of @l@, then @k@ and @l@ have the same parent. , rsib :: VU.Vector Int -- ^ The right sibling for a node @k@. , roots :: VU.Vector Int -- ^ The roots of the individual trees, the forest was constructed -- from. } -> Forest p v a deriving instance (Show a, Show (v a)) => Show (Forest p v a) -- | Construct a static 'Forest' with a tree traversal function. I.e. -- @forestWith preorderF trees@ will construct a pre-order forest from the -- list of @trees@. -- -- Siblings span trees in the forest! forestWith :: (VG.Vector v a) => (forall a . [T.Tree a] -> [a]) -> [T.Tree a] -> Forest (p::TreeOrder) v a forestWith f ts = Forest { label = VG.fromList $ f ts , parent = VU.fromList $ map (\(_,k,_ ,_) -> k ) $ f pcs , children = V.fromList $ map (\(_,_,cs,_) -> VU.fromList cs) $ f pcs , lsib = VU.fromList $ map fst $ S.elems lr , rsib = VU.fromList $ map snd $ S.elems lr , roots = VU.fromList $ map (fst . T.rootLabel) us } where -- Step 1: construct a forest isomorphic to @ts@ but labelled with -- a total order of unique identifiers. (That is: label with @Int@s). -- The unique identifiers are in pre-order. ps = addIndicesF' 0 ts -- Step 2: use @f@ to produce a permutation map and apply this -- permutation to turn the pre-order @ps@ into the required order. backp = VU.fromList $ map snd $ sort $ zip (f ps) [0..] -- Step 3: decorate the forest with indices in the correct order. Keep -- the label in @snd@. us = map (fmap (\(k,l) -> (backp VG.! k,l))) $ addIndicesF 0 ts -- Step 4: add the correct relations (children, lrSibling, parents) pcs = parentChildrenF (-1) us -- A map with the left and right sibling lr = lrSiblingF us -- | Construct a pre-ordered forest. forestPre :: (VG.Vector v a) => [T.Tree a] -> Forest Pre v a forestPre = forestWith preorderF -- | Construct a post-ordered forest. forestPost :: (VG.Vector v a) => [T.Tree a] -> Forest Post v a forestPost = forestWith postorderF -- | Add @pre-ordered@ @(!)@ indices. First argument is the starting index. addIndices :: Int -> T.Tree a -> T.Tree (Int,a) addIndices k = snd . mapAccumL (\i e -> (i+1, (i,e))) k -- | Add @pre-ordered@ @(!)@ indices, but to a forest. addIndicesF :: Int -> [T.Tree a] -> [T.Tree (Int,a)] addIndicesF k = snd . mapAccumL go k where go = mapAccumL (\i e -> (i+1, (i,e))) -- | Add @pre-ordered@ @(!)@ indices to a forest, but throw the label away as -- well. addIndicesF' :: Int -> [T.Tree a] -> [T.Tree Int] addIndicesF' k = snd . mapAccumL go k where go = mapAccumL (\i e -> (i+1, i)) -- | Add parent + children information. Yields -- @(Index,Parent,[Child],Label)@. Parent is @-1@ if root node. parentChildrenF :: Int -> [T.Tree (Int,a)] -> [T.Tree (Int,Int,[Int],a)] parentChildrenF k ts = [ T.Node (i,k,children sf,l) (parentChildrenF i sf) | T.Node (i,l) sf <- ts ] where children sf = map (fst . T.rootLabel) sf -- | Return a map with all the nearest siblings for each node, for a forest. lrSiblingF :: [T.Tree (Int,a)] -> S.Map Int (Int,Int) lrSiblingF = S.delete (-1) . lrSibling . T.Node (-1,error "laziness in lrSiblingF broken") -- | Return a map with all the nearest siblings for each node, for a tree. lrSibling :: T.Tree (Int,a) -> S.Map Int (Int,Int) lrSibling = S.fromList . map splt . T.flatten . go ([]::[Int]) where go sib (T.Node (k,lbl) frst) = let cs = [l | T.Node (l,_) _ <- frst] in T.Node (k,lbl,sib) [ go cs t | t <- frst] splt (k,_,[]) = (k,(-1,-1)) splt (k,_,sbl) = let (ls,rs) = span (/=k) sbl in (k,(last $ (-1):ls,head $ tail rs ++ [-1])) -- | Return the left-most leaf for each node. leftMostLeaves :: Forest p v a -> VU.Vector Int leftMostLeaves f = VG.map (leftMostLeaf f) $ VG.enumFromN 0 $ VG.length $ parent f -- | Just the leaf-most leaf for a certain node. leftMostLeaf :: Forest p v a -> Int -> Int leftMostLeaf f = go where go k = let cs = children f VG.! k in if VG.null cs then k else go (VG.head cs) -- | Return the right-most leaf for each node. rightMostLeaves :: Forest p v a -> VU.Vector Int rightMostLeaves f = VG.map (rightMostLeaf f) $ VG.enumFromN 0 $ VG.length $ parent f -- | Given a tree, and a node index, return the right-most leaf for the -- node. rightMostLeaf :: Forest p v a -> Int -> Int rightMostLeaf f = go where go k = let cs = children f VG.! k in if VG.null cs then k else go (VG.last cs) -- | Return all left key roots. These are the nodes that have no (super-) -- parent with the same left-most leaf. -- -- This function is somewhat specialized for tree editing. -- -- TODO group by leftKeyRoots :: Forest Post v a -> VU.Vector Int leftKeyRoots f = VU.fromList . sort . S.elems $ VU.foldl' go S.empty (VU.enumFromN (0::Int) $ VG.length $ parent f) -- Build a map from left-most leaf to most root-near node. where go s k = S.insertWith max (lml VU.! k) k s lml = leftMostLeaves f -- | Returns the list of all sorted subsets of subforests in the forest. -- If the forest is given in pre-order, then The subsets are returned in -- reversed pre-order. -- -- TODO turn this into @newtype vectors@ that enforce @size >= 1@. sortedSubForests :: Forest p v a -> [VU.Vector Int] sortedSubForests f = -- cleanup map VU.fromList . L.nub -- TODO revise later, is in @O(n^2)@ . concat -- make sure that in our partial order we have smaller forests come -- first. . map (map unSrt . Set.toList . Set.fromList . map Srt) -- get all nonempty ordered subforests . map (concatMap (L.tail . L.subsequences)) . map (L.permutations) . map VG.toList . VG.toList -- only nodes with children . VG.filter (not . VG.null) -- every node that has children in reverse order -- make sure that the roots are there, but come last $ VG.snoc (VG.reverse (children f)) (roots f) newtype Srt = Srt { unSrt :: [Int] } deriving (Eq,Show) instance Ord Srt where Srt xs <= Srt ys = length xs <= length ys -- | Given a forest, return the list of trees that constitue the forest. forestToTrees :: Forest p v a -> T.Forest a forestToTrees Forest{..} = map getTree . VG.toList $ roots where getTree k = T.Node (label VG.! k) (map getTree . VG.toList $ children VG.! k) -- * QuickCheck -- | Wrapped quickcheck instance for 'T.Tree'. newtype QCTree a = QCTree { getTree :: T.Tree a } deriving (Show) instance (Arbitrary a) => Arbitrary (QCTree a) where arbitrary = let go = sized $ \n -> do val <- arbitrary let n' = n `div` 2 nodes <- if n' > 0 then do k <- choose (0,n') resize n' $ replicateM k (getTree <$> arbitrary) else return [] return $ T.Node val nodes in QCTree <$> go shrink (QCTree (T.Node val forest)) = [] -- [ QCTree $ T.Node v f | v <- shrink val, f <- map (map getTree) $ shrink $ map QCTree forest ] -- * Test functions test1 :: [T.Tree Char] test1 = [T.Node 'R' [T.Node 'a' [], T.Node 'b' []], T.Node 'S' [T.Node 'x' [], T.Node 'y' []]] test2 :: [T.Tree Char] test2 = [T.Node 'R' [T.Node 'a' [], T.Node 'b' [], T.Node 'c' []]] runtest t = do print (forestPre t :: Forest Pre V.Vector Char) print (forestPost t :: Forest Post V.Vector Char) print (forestPost [T.Node 'R' [T.Node 'a' []]] :: Forest Post V.Vector Char) print (forestPost [T.Node 'R' [T.Node 'a' [], T.Node 'b' []]] :: Forest Post V.Vector Char) print (sortedSubForests (forestPre t :: Forest Pre V.Vector Char))