-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | A text compression library. -- -- This package contains efficient implementations of various text -- compression algorithms. @package text-compression @version 0.1.0.8 -- |

WARNING

-- -- This module is considered internal. -- -- The Package Versioning Policy does not apply. -- -- The contents of this module may change in any way whatsoever -- and without any warning between minor versions of this package. -- -- Authors importing this library are expected to track development -- closely. -- -- All credit goes to the author(s)/maintainer(s) of the -- containers library for the above warning text. -- --

Description

-- -- Various data structures and custom data types to describe the -- Burrows-Wheeler Transform (BWT) and the Inverse BWT. -- -- The implementation of the BWT relies upon Boxed vectors, -- Vector, and Unboxed vectors, Vector, provided by the -- vector. -- -- The internal BWTMatrix data type relies upon the massiv -- package. module Data.BWT.Internal -- | Basic suffix data type. Used to describe the core data inside of the -- SuffixArray data type. data Suffix a Suffix :: Int -> Int -> Maybe (Vector a) -> Suffix a [suffixindex] :: Suffix a -> Int [suffixstartpos] :: Suffix a -> Int [suffix] :: Suffix a -> Maybe (Vector a) -- | The SuffixArray data type. Uses Vector internally. type SuffixArray a = Vector (Suffix a) -- | The BWT data type. Uses Vector internally. newtype BWT a BWT :: Vector (Maybe a) -> BWT a -- | The BWTMatrix data type. Uses a massiv array internally. type BWTMatrix = Array BN Ix1 String -- | Computes the Burrows-Wheeler Transform (BWT) using the suffix array -- and the original string (represented as a Vector for -- performance). saToBWT :: Unbox a => SuffixArray a -> Vector a -> BWT a -- | Vector based implementation of the well-known tails function in -- the List library. tailsV :: Unbox a => Vector a -> [Vector a] -- | Custom sort function for Vectors used in the -- createSuffixArray function. sortVecSA :: Ord a => Vector (Int, a) -> Vector (Int, a) -- | Custom sort function for Vectors used in the fromBWT function. sortVecBWT :: Ord a => Vector (a, Int) -> Vector (a, Int) -- | Computes the corresponding SuffixArray of a given string. -- Please see suffix array for more information. createSuffixArray :: (Unbox a, Ord a) => Vector a -> SuffixArray a -- | Hierarchical sorting scheme that compares fst first then snd. -- Necessary for the setting up the BWT in order to correctly invert it -- using the Magic algorithm. sortTB :: (Ord a1, Ord a2) => (a1, a2) -> (a1, a2) -> Ordering -- | Abstract BWTSeq type utilizing a sequence. type BWTVec a = Vector a -- | Abstract data type representing a BWTVec in the (strict) ST -- monad. type STBWTVec s a = STRef s (BWTVec a) -- | State function to push BWTVec data into stack. pushSTBWTVec :: STBWTVec s a -> a -> ST s () -- | State function to create empty STBWTVec type. emptySTBWTVec :: ST s (STBWTVec s a) -- | Abstract BWTCounter and associated state type. type STBWTCounter s a = STRef s Int -- | State function to update BWTCounter. updateSTBWTCounter :: STBWTCounter s Int -> Int -> ST s () -- | State function to create empty STBWTCounter type. emptySTBWTCounter :: ST s (STBWTCounter s Int) -- | Magic Inverse BWT function. magicInverseBWT :: Vector (Maybe a, Int) -> ST s (BWTVec a) -- | Simple yet efficient implementation of converting a given string into -- a BWT Matrix (the BWTMatrix type is a massiv array). createBWTMatrix :: String -> BWTMatrix instance GHC.Generics.Generic (Data.BWT.Internal.Suffix a) instance (Data.Vector.Unboxed.Base.Unbox a, GHC.Classes.Ord a) => GHC.Classes.Ord (Data.BWT.Internal.Suffix a) instance (Data.Vector.Unboxed.Base.Unbox a, GHC.Classes.Eq a) => GHC.Classes.Eq (Data.BWT.Internal.Suffix a) instance (GHC.Read.Read a, Data.Vector.Unboxed.Base.Unbox a) => GHC.Read.Read (Data.BWT.Internal.Suffix a) instance (GHC.Show.Show a, Data.Vector.Unboxed.Base.Unbox a) => GHC.Show.Show (Data.BWT.Internal.Suffix a) instance GHC.Generics.Generic (Data.BWT.Internal.BWT a) instance GHC.Read.Read a => GHC.Read.Read (Data.BWT.Internal.BWT a) instance GHC.Show.Show a => GHC.Show.Show (Data.BWT.Internal.BWT a) instance GHC.Classes.Ord a => GHC.Classes.Ord (Data.BWT.Internal.BWT a) instance GHC.Classes.Eq a => GHC.Classes.Eq (Data.BWT.Internal.BWT a) -- |

Burrows-Wheeler Transform (BWT)

-- -- The two functions that most users will utilize are toBWT and -- fromBWT. There are auxilary function(s) inside of -- Data.BWT.Internal. -- -- The helper functions for ByteString, bytestringToBWT, -- bytestringFromWord8BWT , bytestringFromByteStringBWT and -- Text, textToBWT and textFromBWT should help for common -- use cases. -- -- Data.BWT.Internal also has the function -- createBWTMatrix, which can be useful as well, although not used -- by either toBWT or fromBWT. module Data.BWT -- | Takes a String and returns the Burrows-Wheeler Transform (BWT). -- Implemented via a SuffixArray. toBWT :: (Unbox a, Ord a) => [a] -> BWT a -- | Helper function for converting a ByteString to a BWT -- Word8. bytestringToBWT :: ByteString -> BWT Word8 -- | A newtype to ensure you only uncompress a BWT created from textToBWT, -- since [Word8] -> Text is partial. newtype TextBWT TextBWT :: BWT Word8 -> TextBWT -- | Helper function for converting Text to a TextBWT. textToBWT :: Text -> TextBWT -- | Takes a BWT data type (please see Data.BWT.Internal) -- and inverts it back to the original string. -- -- This function utilizes the state monad (strict) in order to implement -- the Magic Inverse BWT algorithm by backtracking indices -- starting with the (Nothing,_) entry. fromBWT :: Ord a => BWT a -> [a] -- | Helper function for converting a BWT of Word8s to a -- ByteString. bytestringFromWord8BWT :: BWT Word8 -> ByteString -- | Helper function for converting a BWT ByteStrings to a -- ByteString. bytestringFromByteStringBWT :: BWT ByteString -> ByteString -- | Helper function for converting TextBWT to a Text textFromBWT :: TextBWT -> Text instance GHC.Generics.Generic Data.BWT.TextBWT instance GHC.Read.Read Data.BWT.TextBWT instance GHC.Show.Show Data.BWT.TextBWT instance GHC.Classes.Ord Data.BWT.TextBWT instance GHC.Classes.Eq Data.BWT.TextBWT -- |

WARNING

-- -- This module is considered internal. -- -- The Package Versioning Policy does not apply. -- -- The contents of this module may change in any way whatsoever -- and without any warning between minor versions of this package. -- -- Authors importing this library are expected to track development -- closely. -- -- All credit goes to the author(s)/maintainer(s) of the -- containers library for the above warning text. -- --

Description

-- -- Various data structures and custom data types to describe the -- Run-length encoding (RLE) and the Inverse RLE implementations, namely -- vecToRLEB, vecToRLET, vecFromRLEB, and -- vecFromRLET. -- -- The RLE implementations rely heavily upon Vector provided by -- the vector library, STRef and associated functions in -- the stref library, and runST in the -- Control.Monad.ST library. module Data.RLE.Internal -- | Basic RLE (ByteString) data type. newtype RLEB RLEB :: Vector (Maybe ByteString) -> RLEB -- | Basic RLE (Text) data type. newtype RLET RLET :: Vector (Maybe Text) -> RLET -- | Abstract RLEVecB type utilizing a sequence. type RLEVecB = Vector (Maybe ByteString) -- | Abstract data type representing a RLEVecB in the (strict) ST -- monad. type STRLEVecB s a = STRef s RLEVecB -- | State function to push RLEVecB data into stack. pushSTRLEVecB :: STRLEVecB s (Maybe ByteString) -> Maybe ByteString -> ST s () -- | State function to create empty STRLEVecB type. emptySTRLEVecB :: ST s (STRLEVecB s a) -- | Abstract STRLETempB and associated state type. type STRLETempB s a = STRef s (Maybe ByteString) -- | State function to update STRLETempB. updateSTRLETempB :: STRLETempB s (Maybe ByteString) -> Maybe ByteString -> ST s () -- | State function to create empty STRLETempB type. emptySTRLETempB :: ST s (STRLETempB s a) -- | Abstract STRLECounterB state type. type STRLECounterB s a = STRef s Int -- | State function to update STRLECounterB. updateSTRLECounterB :: STRLECounterB s Int -> Int -> ST s () -- | State function to create empty STRLECounterB type. emptySTRLECounterB :: ST s (STRLECounterB s Int) -- | Strict state monad function. vecToRLEB :: RLEVecB -> ST s RLEVecB -- | Abstract RLEVecT type utilizing a sequence. type RLEVecT = Vector (Maybe Text) -- | Abstract data type representing a RLEVecT in the (strict) ST -- monad. type STRLEVecT s a = STRef s RLEVecT -- | State function to push RLEVecT data into stack. pushSTRLEVecT :: STRLEVecT s (Maybe Text) -> Maybe Text -> ST s () -- | State function to create empty STRLEVecT type. emptySTRLEVecT :: ST s (STRLEVecT s a) -- | Abstract STRLETempT state type. type STRLETempT s a = STRef s (Maybe Text) -- | State function to update STRLETempT. updateSTRLETempT :: STRLETempT s (Maybe Text) -> Maybe Text -> ST s () -- | State function to create empty STRLETempT type. emptySTRLETempT :: ST s (STRLETempT s a) -- | Abstract STRLECounterT and associated state type. type STRLECounterT s a = STRef s Int -- | State function to update STRLECounterT. updateSTRLECounterT :: STRLECounterT s Int -> Int -> ST s () -- | State function to create empty STRLECounterT type. emptySTRLECounterT :: ST s (STRLECounterT s Int) -- | Strict state monad function. vecToRLET :: RLEVecT -> ST s RLEVecT -- | Vector auxilary function to pattern match on first two elements -- of a vector. unconsb2 :: Vector a -> Maybe (a, Vector a, Maybe (Vector a)) -- | Abstract FRLEVecB type utilizing a sequence. type FRLEVecB = Vector (Maybe ByteString) -- | Abstract data type representing a FRLEVecB in the (strict) ST -- monad. type FSTRLEVecB s a = STRef s FRLEVecB -- | State function to push FRLEVecB data into stack. pushFSTRLEVecB :: FSTRLEVecB s (Maybe ByteString) -> Maybe ByteString -> ST s () -- | State function to create empty FSTRLEVecB type. emptyFSTRLEVecB :: ST s (FSTRLEVecB s a) -- | Strict state monad function. vecFromRLEB :: RLEB -> ST s FRLEVecB -- | Vector auxilary function to pattern match on first two elements -- of a vector. unconst2 :: Vector a -> Maybe (a, Vector a, Maybe (Vector a)) -- | Abstract FRLEVecT type utilizing a sequence. type FRLEVecT = Vector (Maybe Text) -- | Abstract data type representing a FRLEVecT in the (strict) ST -- monad. type FSTRLEVecT s a = STRef s FRLEVecT -- | State function to push FSTRLEVecT data into stack. pushFSTRLEVecT :: FSTRLEVecT s (Maybe Text) -> Maybe Text -> ST s () -- | State function to create empty FSTRLEVecT type. emptyFSTRLEVecT :: ST s (FSTRLEVecT s a) -- | Strict state monad function. vecFromRLET :: RLET -> ST s FRLEVecT instance GHC.Generics.Generic Data.RLE.Internal.RLEB instance GHC.Read.Read Data.RLE.Internal.RLEB instance GHC.Show.Show Data.RLE.Internal.RLEB instance GHC.Classes.Ord Data.RLE.Internal.RLEB instance GHC.Classes.Eq Data.RLE.Internal.RLEB instance GHC.Generics.Generic Data.RLE.Internal.RLET instance GHC.Read.Read Data.RLE.Internal.RLET instance GHC.Show.Show Data.RLE.Internal.RLET instance GHC.Classes.Ord Data.RLE.Internal.RLET instance GHC.Classes.Eq Data.RLE.Internal.RLET -- |

Run-length encoding (RLE)

module Data.RLE -- | Helper function for converting a ByteString to a RLEB -- via a BWT first. bytestringToBWTToRLEB :: ByteString -> RLEB -- | Helper function for converting a ByteString to a RLET -- via a BWT first. bytestringToBWTToRLET :: ByteString -> RLET -- | Helper function for converting a Text to a RLEB via a -- BWT first. textToBWTToRLEB :: Text -> RLEB -- | Helper function for converting a Text to a RLET via a -- BWT first. textToBWTToRLET :: Text -> RLET -- | Take a BWT of Word8s and generate the Run-length -- encoding (RLEB). textBWTToRLEB :: TextBWT -> RLEB -- | Take a BWT of Word8s and generate the Run-length -- encoding (RLEB). bytestringBWTToRLEB :: BWT Word8 -> RLEB -- | Take a BWT of Word8s and generate the Run-length -- encoding (RLEB). textBWTToRLET :: TextBWT -> RLET -- | Take a BWT of Word8s and generate the Run-length -- encoding (RLET). bytestringBWTToRLET :: BWT Word8 -> RLET -- | Takes a Text and returns the Run-length encoding (RLEB). textToRLEB :: Vector (Maybe Text) -> RLEB -- | Takes a Vector of ByteStrings and returns the Run-length -- encoding (RLEB). bytestringToRLEB :: Vector (Maybe ByteString) -> RLEB -- | Takes a Text and returns the Run-length encoding (RLE). textToRLET :: Vector (Maybe Text) -> RLET -- | Takes a ByteString and returns the Run-length encoding (RLE). bytestringToRLET :: Vector (Maybe ByteString) -> RLET -- | Helper function for converting a BWTed RLEB back to the -- original ByteString. bytestringFromBWTFromRLEB :: RLEB -> ByteString -- | Helper function for converting a BWTed RLET back to the -- original ByteString. bytestringFromBWTFromRLET :: RLET -> ByteString -- | Helper function for converting a BWTed RLEB back to the -- original Text. textFromBWTFromRLEB :: RLEB -> Text -- | Helper function for converting a BWTed RLET back to the -- original Text. textFromBWTFromRLET :: RLET -> Text -- | Takes a RLET and returns the BWT of Texts. textBWTFromRLET :: RLET -> BWT Text -- | Takes a RLET and returns the BWT of ByteStrings. bytestringBWTFromRLET :: RLET -> BWT ByteString -- | Takes a RLEB and returns the BWT of Texts. textBWTFromRLEB :: RLEB -> BWT Text -- | Take a RLEB and returns the BWT of ByteStrings. bytestringBWTFromRLEB :: RLEB -> BWT ByteString -- | Takes a RLEB and returns the original Vector of -- Texts. textFromRLEB :: RLEB -> Vector (Maybe Text) -- | Takes a RLEB and returns the original Vector of -- ByteStrings. bytestringFromRLEB :: RLEB -> Vector (Maybe ByteString) -- | Takes a RLET and returns the original Vector of -- Texts. textFromRLET :: RLET -> Vector (Maybe Text) -- | Takes a RLET and returns the original Vector of -- ByteStrings. bytestringFromRLET :: RLET -> Vector (Maybe ByteString)