{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE Strict #-}
module Data.BWT (
toBWT,
bytestringToBWT,
TextBWT(..),
textToBWT,
fromBWT,
bytestringFromWord8BWT,
bytestringFromByteStringBWT,
textFromBWT
) where
import Data.BWT.Internal
import Control.Monad()
import Control.Monad.ST as CMST
import Control.Monad.State.Strict()
import Data.ByteString as BS (ByteString,concat,pack,unpack)
import Data.Foldable as DFold (toList)
import Data.Sequence as DS (Seq(..),fromList,iterateN,length,unstableSortBy,zip)
import Data.STRef()
import Data.Text (Text)
import Data.Text.Encoding as DTE (decodeUtf8,encodeUtf8)
import Data.Word (Word8)
import GHC.Generics(Generic)
toBWT :: Ord a
=> [a]
-> BWT a
toBWT :: forall a. Ord a => [a] -> BWT a
toBWT [] = forall a. Seq (Maybe a) -> BWT a
BWT forall a. Seq a
DS.Empty
toBWT [a]
xs = do
let saxs :: SuffixArray a
saxs = forall a. Ord a => Seq a -> SuffixArray a
createSuffixArray Seq a
xss
forall a. Seq (Maybe a) -> BWT a
BWT (forall a. SuffixArray a -> Seq a -> Seq (Maybe a)
saToBWT SuffixArray a
saxs
Seq a
xss)
where
xss :: Seq a
xss = forall a. [a] -> Seq a
DS.fromList [a]
xs
bytestringToBWT :: ByteString
-> BWT Word8
bytestringToBWT :: ByteString -> BWT Word8
bytestringToBWT = forall a. Ord a => [a] -> BWT a
toBWT forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [Word8]
BS.unpack
newtype TextBWT = TextBWT (BWT Word8)
deriving (TextBWT -> TextBWT -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TextBWT -> TextBWT -> Bool
$c/= :: TextBWT -> TextBWT -> Bool
== :: TextBWT -> TextBWT -> Bool
$c== :: TextBWT -> TextBWT -> Bool
Eq,Eq TextBWT
TextBWT -> TextBWT -> Bool
TextBWT -> TextBWT -> Ordering
TextBWT -> TextBWT -> TextBWT
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: TextBWT -> TextBWT -> TextBWT
$cmin :: TextBWT -> TextBWT -> TextBWT
max :: TextBWT -> TextBWT -> TextBWT
$cmax :: TextBWT -> TextBWT -> TextBWT
>= :: TextBWT -> TextBWT -> Bool
$c>= :: TextBWT -> TextBWT -> Bool
> :: TextBWT -> TextBWT -> Bool
$c> :: TextBWT -> TextBWT -> Bool
<= :: TextBWT -> TextBWT -> Bool
$c<= :: TextBWT -> TextBWT -> Bool
< :: TextBWT -> TextBWT -> Bool
$c< :: TextBWT -> TextBWT -> Bool
compare :: TextBWT -> TextBWT -> Ordering
$ccompare :: TextBWT -> TextBWT -> Ordering
Ord,Int -> TextBWT -> ShowS
[TextBWT] -> ShowS
TextBWT -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TextBWT] -> ShowS
$cshowList :: [TextBWT] -> ShowS
show :: TextBWT -> String
$cshow :: TextBWT -> String
showsPrec :: Int -> TextBWT -> ShowS
$cshowsPrec :: Int -> TextBWT -> ShowS
Show,ReadPrec [TextBWT]
ReadPrec TextBWT
Int -> ReadS TextBWT
ReadS [TextBWT]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [TextBWT]
$creadListPrec :: ReadPrec [TextBWT]
readPrec :: ReadPrec TextBWT
$creadPrec :: ReadPrec TextBWT
readList :: ReadS [TextBWT]
$creadList :: ReadS [TextBWT]
readsPrec :: Int -> ReadS TextBWT
$creadsPrec :: Int -> ReadS TextBWT
Read,forall x. Rep TextBWT x -> TextBWT
forall x. TextBWT -> Rep TextBWT x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep TextBWT x -> TextBWT
$cfrom :: forall x. TextBWT -> Rep TextBWT x
Generic)
textToBWT :: Text
-> TextBWT
textToBWT :: Text -> TextBWT
textToBWT = BWT Word8 -> TextBWT
TextBWT forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> BWT Word8
bytestringToBWT forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
DTE.encodeUtf8
fromBWT :: Ord a
=> BWT a
-> [a]
fromBWT :: forall a. Ord a => BWT a -> [a]
fromBWT BWT a
bwt = do
let originall :: BWTSeq a
originall = forall a. (forall s. ST s a) -> a
CMST.runST forall a b. (a -> b) -> a -> b
$ forall a s. Seq (Maybe a, Int) -> ST s (BWTSeq a)
magicInverseBWT Seq (Maybe a, Int)
magicsz
forall (t :: * -> *) a. Foldable t => t a -> [a]
DFold.toList BWTSeq a
originall
where
magicsz :: Seq (Maybe a, Int)
magicsz = forall a. (a -> a -> Ordering) -> Seq a -> Seq a
DS.unstableSortBy (\(Maybe a
a,Int
b) (Maybe a
c,Int
d) -> forall a1 a2. (Ord a1, Ord a2) => (a1, a2) -> (a1, a2) -> Ordering
sortTB (Maybe a
a,Int
b) (Maybe a
c,Int
d))
Seq (Maybe a, Int)
zipped
zipped :: Seq (Maybe a, Int)
zipped = forall a b. Seq a -> Seq b -> Seq (a, b)
DS.zip Seq (Maybe a)
bwtt
(forall a. Int -> (a -> a) -> a -> Seq a
DS.iterateN (forall a. Seq a -> Int
DS.length Seq (Maybe a)
bwtt) (forall a. Num a => a -> a -> a
+Int
1) Int
0)
bwtt :: Seq (Maybe a)
bwtt = (\(BWT Seq (Maybe a)
t) -> Seq (Maybe a)
t) BWT a
bwt
bytestringFromWord8BWT :: BWT Word8
-> ByteString
bytestringFromWord8BWT :: BWT Word8 -> ByteString
bytestringFromWord8BWT = [Word8] -> ByteString
BS.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Ord a => BWT a -> [a]
fromBWT
bytestringFromByteStringBWT :: BWT ByteString
-> ByteString
bytestringFromByteStringBWT :: BWT ByteString -> ByteString
bytestringFromByteStringBWT = [ByteString] -> ByteString
BS.concat forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Ord a => BWT a -> [a]
fromBWT
textFromBWT :: TextBWT
-> Text
textFromBWT :: TextBWT -> Text
textFromBWT (TextBWT BWT Word8
x) = ByteString -> Text
DTE.decodeUtf8 forall a b. (a -> b) -> a -> b
$
BWT Word8 -> ByteString
bytestringFromWord8BWT BWT Word8
x