-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Efficient, lazy suffix tree implementation -- -- An efficient, lazy suffix tree implementation. @package suffixtree @version 0.2.2 -- | A lazy, efficient suffix tree implementation. -- -- Since many function names (but not the type name) clash with -- Prelude names, this module is usually imported -- qualified, e.g. -- --
--   import Data.SuffixTree (STree)
--   import qualified Data.SuffixTree as T
--   
-- -- The implementation is based on the first of those described in the -- following paper: -- -- -- -- This implementation constructs the suffix tree lazily, so subtrees are -- not created until they are traversed. Two construction functions are -- provided, constructWith for sequences composed of small -- alphabets, and construct for larger alphabets. -- -- Estimates are given for performance. The value k is a constant; -- n is the length of a query string; and t is the number -- of elements (nodes and leaves) in a suffix tree. module Data.SuffixTree -- | The list of symbols that constructWith can possibly see in its -- input. type Alphabet a = [a] -- | An edge in the suffix tree. type Edge a = (Prefix a, STree a) -- | The prefix string associated with an Edge. Use mkPrefix -- to create a value of this type, and prefix to deconstruct one. data Prefix a -- | The suffix tree type. The implementation is exposed to ease the -- development of custom traversal functions. Note that -- (Prefix a, STree a) pairs are not stored in any -- order. data STree a Node :: [Edge a] -> STree a Leaf :: STree a -- | O(k n log n). Constructs a suffix tree using the given -- alphabet. The performance of this function is linear in the size -- k of the alphabet. That makes this function suitable for small -- alphabets, such as DNA nucleotides. For an alphabet containing more -- than a few symbols, construct is usually several orders of -- magnitude faster. constructWith :: Eq a => Alphabet a -> [a] -> STree a -- | O(n log n). Constructs a suffix tree. construct :: Ord a => [a] -> STree a -- | O(n). Indicates whether the suffix tree contains the given -- sublist. Performance is linear in the length n of the sublist. elem :: Eq a => [a] -> STree a -> Bool -- | O(n). Finds the given subsequence in the suffix tree. On -- failure, returns Nothing. On success, returns the Edge -- in the suffix tree at which the subsequence ends, along with the -- number of elements to drop from the prefix of the Edge to get -- the "real" remaining prefix. -- -- Here is an example: -- --
--   > find "ssip" (construct "mississippi")
--   Just ((mkPrefix "ppi",Leaf),1)
--   
-- -- This indicates that the edge (mkPrefix -- "ppi",Leaf) matches, and that we must strip 1 character -- from the string "ppi" to get the remaining prefix string -- "pi". -- -- Performance is linear in the length n of the query list. findEdge :: Eq a => [a] -> STree a -> Maybe (Edge a, Int) -- | O(n). Finds the subtree rooted at the end of the given query -- sequence. On failure, returns Nothing. -- -- Performance is linear in the length n of the query list. findTree :: Eq a => [a] -> STree a -> Maybe (STree a) -- | O(n). Returns the path from the Edge in the suffix tree -- at which the given subsequence ends, up to the root of the tree. If -- the subsequence is not found, returns the empty list. -- -- Performance is linear in the length of the query list. findPath :: Eq a => [a] -> STree a -> [Edge a] -- | O(t). Count the number of leaves in a tree. -- -- Performance is linear in the number t of elements in the tree. countLeaves :: STree a -> Int -- | O(n + r). Count the number of times a sequence is repeated in -- the input sequence that was used to construct the suffix tree. -- -- Performance is linear in the length n of the input sequence, -- plus the number of times r the sequence is repeated. countRepeats :: Eq a => [a] -> STree a -> Int -- | O(t). Folds the edges in a tree, using post-order traversal. -- Suitable for lazy use. foldr :: (Prefix a -> b -> b) -> b -> STree a -> b -- | O(t). Folds the edges in a tree, using pre-order traversal. The -- step function is evaluated strictly. foldl :: (a -> Prefix b -> a) -> a -> STree b -> a -- | O(t). Generic fold over a tree. -- -- A few simple examples. -- --
--   countLeaves == fold id id (const const) (1+) 0
--   
-- --
--   countEdges = fold id id (\_ a _ -> a+1) id 0
--   
-- -- This more complicated example generates a tree of the same shape, but -- new type, with annotated leaves. -- --
--   data GenTree a b = GenNode [(Prefix a, GenTree a b)]
--                    | GenLeaf b
--                      deriving (Show)
--   
-- --
--   gentree :: STree a -> GenTree a Int
--   gentree = fold reset id fprefix reset leaf
--       where leaf = GenLeaf 1
--             reset = const leaf
--             fprefix p t (GenLeaf _) = GenNode [(p, t)]
--             fprefix p t (GenNode es) = GenNode ((p, t):es)
--   
fold :: (a -> a) -> (a -> a) -> (Prefix b -> a -> a -> a) -> (a -> a) -> a -> STree b -> a -- | O(1). Construct a Prefix value. mkPrefix :: [a] -> Prefix a -- | O(n). Obtain the list stored in a Prefix. prefix :: Prefix a -> [a] -- | O(n). Returns all non-empty suffixes of the argument, longest -- first. Behaves as follows: -- --
--   suffixes xs == init (tails xs)
--   
suffixes :: [a] -> [[a]] instance Show a => Show (STree a) instance Show a => Show (Length a) instance Functor STree instance Functor Prefix instance Show a => Show (Prefix a) instance Ord a => Ord (Prefix a) instance Eq a => Eq (Prefix a)