-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Simple and moderately efficient suffix array implementation
--
-- A simple implementation of a suffix array, with longest-common-prefix
-- array. While not asymptotically optimal, performs well in practice for
-- medium use.
@package suffix-array
@version 0.3.0.0
-- | Internal implementation details, unstable and not to be relied upon
-- for any reason.
module Data.SuffixArray.Internal
-- | A character in a string (or set of strings) we're going to compute the
-- suffix array of. Includes Sentinal markers for the end of
-- strings.
data Alpha a
-- | Used to mark the end of a string. The Int parameter is used to
-- encode which string this is the end of, in cases where there are
-- multiple.
Sentinal :: Int -> Alpha a
-- | An actual character in the string.
Alpha :: a -> Alpha a
-- | A naively implemented suffix array implementation which will be used
-- for correctness checking and possibly to benchmark against. Shouldn't
-- usually be used in production code, as it is quite slow in the worst
-- case. In cases with few identical suffixes, it can actually perform
-- quite well. See benchmarks for some details.
--
-- worst case O(n^2 lg n) time (where n is the sum of the string lengths
-- + the number of strings)
naive :: Ord a => [[a]] -> [Int]
-- | Convenience wrapper around naive for a single string.
--
-- worst case O(n^2 lg n) time (where n is the length of the string)
naiveOne :: Ord a => [a] -> [Int]
-- | A naively implemented LCP implementation, used for correctness testing
-- the actual algorithm.
--
-- The Longest Common Prefix list gives the longest common prefix of each
-- suffix and the previous suffix, with the suffixes in lexicographic
-- order.
--
-- worst case O(n^2 lg n) time (where n is the sum of the string lengths
-- + the number of strings)
--
-- The LCP part is an extra O(n^2) in addition to the work of computing
-- the suffix array in the first place.
naiveLcp :: Ord a => [[a]] -> [Int]
-- | Convenience wrapper around naiveLcp for a single string.
--
-- worst case O(n^2 lg n) time (where n is the length of the string)
--
-- The LCP part is an extra O(n^2) in addition to the work of computing
-- the suffix array in the first place.
naiveLcpOne :: Ord a => [a] -> [Int]
-- | Prepare a list of strings to compute the suffix array of them. Just
-- wraps every character in Alpha and adds Sentinals to the
-- end of each string, and concatenates it together.
prepare :: [[a]] -> [Alpha a]
-- | Convenience function to prepare a single string.
prepareOne :: [a] -> [Alpha a]
-- | Take a sorted list of elements and replace each value with an
-- Int such that any comparisons between elements in the original
-- list would yield exactly the same result in the output list.
--
-- i.e.: let rs = rank xs in all [ (xs!!i) compare (xs!!j) ==
-- (rs!!i) compare (rs!!j) | let idx = [0 .. length xs - 1], i
-- <- idx, j <- idx ]
rank :: Ord a => [a] -> [Int]
-- | Yields the non-empty suffixes of a list in order of decreasing length.
--
-- This differs from tails in that it does not include the empty
-- list at the end.
suffixes :: [a] -> [[a]]
instance GHC.Show.Show a => GHC.Show.Show (Data.SuffixArray.Internal.Alpha a)
instance GHC.Classes.Ord a => GHC.Classes.Ord (Data.SuffixArray.Internal.Alpha a)
instance GHC.Classes.Eq a => GHC.Classes.Eq (Data.SuffixArray.Internal.Alpha a)
-- | Suffix array library main module
module Data.SuffixArray
-- | Holds the computed suffix array data
data SuffixArray a
SuffixArray :: UArray Int Int -> Array Int (Alpha a) -> UArray Int Int -> SuffixArray a
-- | The actual array of suffixes in lexicographic order.
[toSuffixes] :: SuffixArray a -> UArray Int Int
-- | The original string(s) with Sentinal values included after each
-- string.
[toAlphas] :: SuffixArray a -> Array Int (Alpha a)
-- | Longest Common Prefix of each suffix with the previous one in
-- lexicographic order
[toLcp] :: SuffixArray a -> UArray Int Int
-- | Compute the suffix array of the given string(s) concatenated together
-- with Sentinals after each.
--
-- worst case O(n lg n) time (where n is the sum of the string lengths +
-- the number of strings)
suffixArray :: Ord a => [[a]] -> SuffixArray a
-- | Convenience function to compute the suffix array of a single string.
-- (Still gets a Sentinal at the end)
--
-- worst case O(n lg n) time (where n is the length of the string)
suffixArrayOne :: Ord a => [a] -> SuffixArray a
-- | A character in a string (or set of strings) we're going to compute the
-- suffix array of. Includes Sentinal markers for the end of
-- strings.
data Alpha a
-- | Used to mark the end of a string. The Int parameter is used to
-- encode which string this is the end of, in cases where there are
-- multiple.
Sentinal :: Int -> Alpha a
-- | An actual character in the string.
Alpha :: a -> Alpha a
-- | Convenience function to just give a list characters in the
-- concatenated original strings.
justAlphas :: SuffixArray a -> [Alpha a]
-- | Convenience function to just give a list of the longest common prefix
-- of every suffix with the previous suffix in lexicographic order.
justLcp :: SuffixArray a -> [Int]
-- | Convenience function to just give a list of the suffixes in
-- lexicographic order.
justSuffixes :: SuffixArray a -> [Int]
instance GHC.Show.Show a => GHC.Show.Show (Data.SuffixArray.SuffixArray a)
instance GHC.Classes.Ord a => GHC.Classes.Ord (Data.SuffixArray.SuffixArray a)
instance GHC.Classes.Eq a => GHC.Classes.Eq (Data.SuffixArray.SuffixArray a)