-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Simple and moderately efficient suffix array implementation -- -- A simple implementation of a suffix array, with longest-common-prefix -- array. While not asymptotically optimal, performs well in practice for -- medium use. @package suffix-array @version 0.3.0.0 -- | Internal implementation details, unstable and not to be relied upon -- for any reason. module Data.SuffixArray.Internal -- | A character in a string (or set of strings) we're going to compute the -- suffix array of. Includes Sentinal markers for the end of -- strings. data Alpha a -- | Used to mark the end of a string. The Int parameter is used to -- encode which string this is the end of, in cases where there are -- multiple. Sentinal :: Int -> Alpha a -- | An actual character in the string. Alpha :: a -> Alpha a -- | A naively implemented suffix array implementation which will be used -- for correctness checking and possibly to benchmark against. Shouldn't -- usually be used in production code, as it is quite slow in the worst -- case. In cases with few identical suffixes, it can actually perform -- quite well. See benchmarks for some details. -- -- worst case O(n^2 lg n) time (where n is the sum of the string lengths -- + the number of strings) naive :: Ord a => [[a]] -> [Int] -- | Convenience wrapper around naive for a single string. -- -- worst case O(n^2 lg n) time (where n is the length of the string) naiveOne :: Ord a => [a] -> [Int] -- | A naively implemented LCP implementation, used for correctness testing -- the actual algorithm. -- -- The Longest Common Prefix list gives the longest common prefix of each -- suffix and the previous suffix, with the suffixes in lexicographic -- order. -- -- worst case O(n^2 lg n) time (where n is the sum of the string lengths -- + the number of strings) -- -- The LCP part is an extra O(n^2) in addition to the work of computing -- the suffix array in the first place. naiveLcp :: Ord a => [[a]] -> [Int] -- | Convenience wrapper around naiveLcp for a single string. -- -- worst case O(n^2 lg n) time (where n is the length of the string) -- -- The LCP part is an extra O(n^2) in addition to the work of computing -- the suffix array in the first place. naiveLcpOne :: Ord a => [a] -> [Int] -- | Prepare a list of strings to compute the suffix array of them. Just -- wraps every character in Alpha and adds Sentinals to the -- end of each string, and concatenates it together. prepare :: [[a]] -> [Alpha a] -- | Convenience function to prepare a single string. prepareOne :: [a] -> [Alpha a] -- | Take a sorted list of elements and replace each value with an -- Int such that any comparisons between elements in the original -- list would yield exactly the same result in the output list. -- -- i.e.: let rs = rank xs in all [ (xs!!i) compare (xs!!j) == -- (rs!!i) compare (rs!!j) | let idx = [0 .. length xs - 1], i -- <- idx, j <- idx ] rank :: Ord a => [a] -> [Int] -- | Yields the non-empty suffixes of a list in order of decreasing length. -- -- This differs from tails in that it does not include the empty -- list at the end. suffixes :: [a] -> [[a]] instance GHC.Show.Show a => GHC.Show.Show (Data.SuffixArray.Internal.Alpha a) instance GHC.Classes.Ord a => GHC.Classes.Ord (Data.SuffixArray.Internal.Alpha a) instance GHC.Classes.Eq a => GHC.Classes.Eq (Data.SuffixArray.Internal.Alpha a) -- | Suffix array library main module module Data.SuffixArray -- | Holds the computed suffix array data data SuffixArray a SuffixArray :: UArray Int Int -> Array Int (Alpha a) -> UArray Int Int -> SuffixArray a -- | The actual array of suffixes in lexicographic order. [toSuffixes] :: SuffixArray a -> UArray Int Int -- | The original string(s) with Sentinal values included after each -- string. [toAlphas] :: SuffixArray a -> Array Int (Alpha a) -- | Longest Common Prefix of each suffix with the previous one in -- lexicographic order [toLcp] :: SuffixArray a -> UArray Int Int -- | Compute the suffix array of the given string(s) concatenated together -- with Sentinals after each. -- -- worst case O(n lg n) time (where n is the sum of the string lengths + -- the number of strings) suffixArray :: Ord a => [[a]] -> SuffixArray a -- | Convenience function to compute the suffix array of a single string. -- (Still gets a Sentinal at the end) -- -- worst case O(n lg n) time (where n is the length of the string) suffixArrayOne :: Ord a => [a] -> SuffixArray a -- | A character in a string (or set of strings) we're going to compute the -- suffix array of. Includes Sentinal markers for the end of -- strings. data Alpha a -- | Used to mark the end of a string. The Int parameter is used to -- encode which string this is the end of, in cases where there are -- multiple. Sentinal :: Int -> Alpha a -- | An actual character in the string. Alpha :: a -> Alpha a -- | Convenience function to just give a list characters in the -- concatenated original strings. justAlphas :: SuffixArray a -> [Alpha a] -- | Convenience function to just give a list of the longest common prefix -- of every suffix with the previous suffix in lexicographic order. justLcp :: SuffixArray a -> [Int] -- | Convenience function to just give a list of the suffixes in -- lexicographic order. justSuffixes :: SuffixArray a -> [Int] instance GHC.Show.Show a => GHC.Show.Show (Data.SuffixArray.SuffixArray a) instance GHC.Classes.Ord a => GHC.Classes.Ord (Data.SuffixArray.SuffixArray a) instance GHC.Classes.Eq a => GHC.Classes.Eq (Data.SuffixArray.SuffixArray a)