{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE Strict #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeApplications #-}
module Data.BWT.Internal where
import Control.Monad as CM
import Control.Monad.ST as CMST
import Control.Monad.State.Strict()
import Data.Foldable as DFold
import Data.List as DL
import Data.Sequence as DS
import Data.Massiv.Array as DMA
import Data.Massiv.Core()
import Data.STRef as DSTR
import GHC.Generics
import Prelude as P
data Suffix = Suffix { Suffix -> Int
suffixindex :: Int
, Suffix -> Int
suffixstartpos :: Int
, Suffix -> Seq Char
suffix :: Seq Char
}
deriving (Int -> Suffix -> ShowS
[Suffix] -> ShowS
Suffix -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Suffix] -> ShowS
$cshowList :: [Suffix] -> ShowS
show :: Suffix -> String
$cshow :: Suffix -> String
showsPrec :: Int -> Suffix -> ShowS
$cshowsPrec :: Int -> Suffix -> ShowS
Show,ReadPrec [Suffix]
ReadPrec Suffix
Int -> ReadS Suffix
ReadS [Suffix]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Suffix]
$creadListPrec :: ReadPrec [Suffix]
readPrec :: ReadPrec Suffix
$creadPrec :: ReadPrec Suffix
readList :: ReadS [Suffix]
$creadList :: ReadS [Suffix]
readsPrec :: Int -> ReadS Suffix
$creadsPrec :: Int -> ReadS Suffix
Read,Suffix -> Suffix -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Suffix -> Suffix -> Bool
$c/= :: Suffix -> Suffix -> Bool
== :: Suffix -> Suffix -> Bool
$c== :: Suffix -> Suffix -> Bool
Eq,Eq Suffix
Suffix -> Suffix -> Bool
Suffix -> Suffix -> Ordering
Suffix -> Suffix -> Suffix
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Suffix -> Suffix -> Suffix
$cmin :: Suffix -> Suffix -> Suffix
max :: Suffix -> Suffix -> Suffix
$cmax :: Suffix -> Suffix -> Suffix
>= :: Suffix -> Suffix -> Bool
$c>= :: Suffix -> Suffix -> Bool
> :: Suffix -> Suffix -> Bool
$c> :: Suffix -> Suffix -> Bool
<= :: Suffix -> Suffix -> Bool
$c<= :: Suffix -> Suffix -> Bool
< :: Suffix -> Suffix -> Bool
$c< :: Suffix -> Suffix -> Bool
compare :: Suffix -> Suffix -> Ordering
$ccompare :: Suffix -> Suffix -> Ordering
Ord,forall x. Rep Suffix x -> Suffix
forall x. Suffix -> Rep Suffix x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Suffix x -> Suffix
$cfrom :: forall x. Suffix -> Rep Suffix x
Generic)
type SuffixArray = Seq Suffix
type BWT = Seq Char
type BWTMatrix = DMA.Array BN Ix1 String
saToBWT :: SuffixArray -> Seq Char -> BWT
saToBWT :: SuffixArray -> Seq Char -> Seq Char
saToBWT SuffixArray
DS.Empty Seq Char
_ = forall a. Seq a
DS.Empty
saToBWT (Suffix
y DS.:<| SuffixArray
ys) Seq Char
t =
if | Suffix -> Int
suffixstartpos Suffix
y forall a. Eq a => a -> a -> Bool
/= Int
1
-> forall a. Seq a -> Int -> a
DS.index Seq Char
t (Suffix -> Int
suffixstartpos Suffix
y forall a. Num a => a -> a -> a
- Int
1 forall a. Num a => a -> a -> a
- Int
1)
forall a. a -> Seq a -> Seq a
DS.<| (SuffixArray -> Seq Char -> Seq Char
saToBWT SuffixArray
ys Seq Char
t)
| Bool
otherwise
-> forall a. Seq a -> Int -> a
DS.index Seq Char
t (forall a. Seq a -> Int
DS.length Seq Char
t forall a. Num a => a -> a -> a
- Int
1)
forall a. a -> Seq a -> Seq a
DS.<| (SuffixArray -> Seq Char -> Seq Char
saToBWT SuffixArray
ys Seq Char
t)
createSuffixArray :: Seq Char -> SuffixArray
createSuffixArray :: Seq Char -> SuffixArray
createSuffixArray Seq Char
xs =
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\(Int, Int, Seq Char)
x -> Suffix { suffixindex :: Int
suffixindex = ((\(Int
a,Int
_,Seq Char
_) -> Int
a) (Int, Int, Seq Char)
x)
, suffixstartpos :: Int
suffixstartpos = ((\(Int
_,Int
b,Seq Char
_) -> Int
b) (Int, Int, Seq Char)
x)
, suffix :: Seq Char
suffix = ((\(Int
_,Int
_,Seq Char
c) -> Seq Char
c) (Int, Int, Seq Char)
x)
}
) Seq (Int, Int, Seq Char)
xsssuffixesfff
where
xsssuffixes :: Seq (Seq Char)
xsssuffixes = forall a. Seq a -> Seq (Seq a)
DS.tails Seq Char
xs
xsssuffixesf :: Seq (Int, Seq Char)
xsssuffixesf = forall a b. Seq a -> Seq b -> Seq (a, b)
DS.zip (forall a. [a] -> Seq a
DS.fromList [Int
1..(forall a. Seq a -> Int
DS.length Seq (Seq Char)
xsssuffixes)])
Seq (Seq Char)
xsssuffixes
xsssuffixesff :: Seq (Int, Seq Char)
xsssuffixesff = forall a. (a -> Bool) -> Seq a -> Seq a
DS.filter (\(Int
_,Seq Char
b) -> Bool -> Bool
not forall a b. (a -> b) -> a -> b
$ forall a. Seq a -> Bool
DS.null Seq Char
b)
Seq (Int, Seq Char)
xsssuffixesf
xsssuffixesffsorted :: Seq (Int, Seq Char)
xsssuffixesffsorted = forall b a. Ord b => (a -> b) -> Seq a -> Seq a
DS.sortOn forall a b. (a, b) -> b
snd Seq (Int, Seq Char)
xsssuffixesff
xsssuffixesfff :: Seq (Int, Int, Seq Char)
xsssuffixesfff = (\(Int
a,(Int
b,Seq Char
c)) -> (Int
a,Int
b,Seq Char
c))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
forall a b. Seq a -> Seq b -> Seq (a, b)
DS.zip (forall a. [a] -> Seq a
DS.fromList [Int
1..(forall a. Seq a -> Int
DS.length Seq (Int, Seq Char)
xsssuffixesffsorted)])
Seq (Int, Seq Char)
xsssuffixesffsorted
sortTB :: (Ord a1, Ord a2) => (a1, a2) -> (a1, a2) -> Ordering
sortTB :: forall a1 a2. (Ord a1, Ord a2) => (a1, a2) -> (a1, a2) -> Ordering
sortTB (a1
c1,a2
i1) (a1
c2,a2
i2) = forall a. Ord a => a -> a -> Ordering
compare a1
c1 a1
c2 forall a. Semigroup a => a -> a -> a
<>
forall a. Ord a => a -> a -> Ordering
compare a2
i1 a2
i2
type BWTSeq a = Seq Char
type STBWTSeq s a = STRef s (BWTSeq Char)
pushSTBWTSeq :: STBWTSeq s Char -> Char -> ST s ()
pushSTBWTSeq :: forall s. STBWTSeq s Char -> Char -> ST s ()
pushSTBWTSeq STBWTSeq s Char
s Char
e = do
Seq Char
s2 <- forall s a. STRef s a -> ST s a
readSTRef STBWTSeq s Char
s
forall s a. STRef s a -> a -> ST s ()
writeSTRef STBWTSeq s Char
s (Seq Char
s2 forall a. Seq a -> a -> Seq a
DS.|> Char
e)
emptySTBWTSeq :: ST s (STBWTSeq s Char)
emptySTBWTSeq :: forall s. ST s (STBWTSeq s Char)
emptySTBWTSeq = forall a s. a -> ST s (STRef s a)
newSTRef forall a. Seq a
DS.empty
type STBWTCounter s a = STRef s Int
updateSTBWTCounter :: STBWTCounter s Int -> Int -> ST s ()
updateSTBWTCounter :: forall s. STBWTCounter s Int -> Int -> ST s ()
updateSTBWTCounter STBWTCounter s Int
s Int
e = forall s a. STRef s a -> a -> ST s ()
writeSTRef STBWTCounter s Int
s Int
e
emptySTBWTCounter :: ST s (STBWTCounter s Int)
emptySTBWTCounter :: forall s. ST s (STBWTCounter s Int)
emptySTBWTCounter = forall a s. a -> ST s (STRef s a)
newSTRef (-Int
1)
magicInverseBWT :: Seq (Char,Int) -> ST s (BWTSeq Char)
magicInverseBWT :: forall s. Seq (Char, Int) -> ST s (Seq Char)
magicInverseBWT Seq (Char, Int)
DS.Empty = do
STBWTSeq s Char
bwtseqstackempty <- forall s. ST s (STBWTSeq s Char)
emptySTBWTSeq
Seq Char
bwtseqstackemptyr <- forall s a. STRef s a -> ST s a
readSTRef STBWTSeq s Char
bwtseqstackempty
forall (m :: * -> *) a. Monad m => a -> m a
return Seq Char
bwtseqstackemptyr
magicInverseBWT Seq (Char, Int)
xs = do
STBWTSeq s Char
bwtseqstack <- forall s. ST s (STBWTSeq s Char)
emptySTBWTSeq
STBWTCounter s Int
bwtcounterstack <- forall s. ST s (STBWTCounter s Int)
emptySTBWTCounter
case (forall a. (a -> Bool) -> Seq a -> Maybe Int
DS.findIndexL ((forall a. Eq a => a -> a -> Bool
== Char
'$') forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst) Seq (Char, Int)
xs) of
Maybe Int
Nothing -> do Seq Char
bwtseqstackr <- forall s a. STRef s a -> ST s a
readSTRef STBWTSeq s Char
bwtseqstack
forall (m :: * -> *) a. Monad m => a -> m a
return Seq Char
bwtseqstackr
Just Int
dollarsignindex -> do let dollarsignfirst :: (Char, Int)
dollarsignfirst = forall a. Seq a -> Int -> a
DS.index Seq (Char, Int)
xs
Int
dollarsignindex
forall s. STBWTCounter s Int -> Int -> ST s ()
updateSTBWTCounter STBWTCounter s Int
bwtcounterstack
(forall a b. (a, b) -> b
snd (Char, Int)
dollarsignfirst)
forall {s}.
Seq (Char, Int) -> STRef s (Seq Char) -> STRef s Int -> ST s ()
iBWT Seq (Char, Int)
xs
STBWTSeq s Char
bwtseqstack
STBWTCounter s Int
bwtcounterstack
Seq Char
bwtseqstackr <- forall s a. STRef s a -> ST s a
readSTRef STBWTSeq s Char
bwtseqstack
forall (m :: * -> *) a. Monad m => a -> m a
return Seq Char
bwtseqstackr
where
iBWT :: Seq (Char, Int) -> STRef s (Seq Char) -> STRef s Int -> ST s ()
iBWT Seq (Char, Int)
ys STRef s (Seq Char)
bwtss STRef s Int
bwtcs = do
Int
cbwtcs <- forall s a. STRef s a -> ST s a
readSTRef STRef s Int
bwtcs
Seq Char
cbwtss <- forall s a. STRef s a -> ST s a
readSTRef STRef s (Seq Char)
bwtss
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
CM.when (forall a. Seq a -> Int
DS.length Seq Char
cbwtss forall a. Ord a => a -> a -> Bool
< forall a. Seq a -> Int
DS.length Seq (Char, Int)
ys) forall a b. (a -> b) -> a -> b
$ do
let next :: (Char, Int)
next = forall a. Seq a -> Int -> a
DS.index Seq (Char, Int)
ys Int
cbwtcs
forall s. STBWTSeq s Char -> Char -> ST s ()
pushSTBWTSeq STRef s (Seq Char)
bwtss
(forall a b. (a, b) -> a
fst (Char, Int)
next)
forall s. STBWTCounter s Int -> Int -> ST s ()
updateSTBWTCounter STRef s Int
bwtcs
(forall a b. (a, b) -> b
snd (Char, Int)
next)
Seq (Char, Int) -> STRef s (Seq Char) -> STRef s Int -> ST s ()
iBWT Seq (Char, Int)
ys STRef s (Seq Char)
bwtss STRef s Int
bwtcs
grabHeadChunks :: Seq (Seq Char) -> (Seq Char,Seq Char)
grabHeadChunks :: Seq (Seq Char) -> (Seq Char, Seq Char)
grabHeadChunks Seq (Seq Char)
DS.Empty = (forall a. Seq a
DS.Empty,forall a. Seq a
DS.Empty)
grabHeadChunks (Seq Char
x1 DS.:<| Seq (Seq Char)
xs) = (Seq Char
x1,Seq (Seq Char) -> Seq Char
grabHeadChunksInternal Seq (Seq Char)
xs)
where
grabHeadChunksInternal :: Seq (Seq Char) -> Seq Char
grabHeadChunksInternal :: Seq (Seq Char) -> Seq Char
grabHeadChunksInternal Seq (Seq Char)
DS.Empty = forall a. Seq a
DS.Empty
grabHeadChunksInternal (Seq Char
y1 DS.:<| Seq (Seq Char)
_) = Seq Char
y1
createBWTMatrix :: String -> BWTMatrix
createBWTMatrix :: String -> BWTMatrix
createBWTMatrix String
t =
forall r e. Manifest r e => Comp -> [e] -> Vector r e
DMA.fromList (Word16 -> Comp
ParN Word16
0) [String]
zippedfff :: Array BN Ix1 String
where
zippedfff :: [String]
zippedfff = forall a b. (a -> b) -> [a] -> [b]
DL.map forall (t :: * -> *) a. Foldable t => t a -> [a]
DFold.toList forall a b. (a -> b) -> a -> b
$
forall a b. (a -> b) -> [a] -> [b]
DL.map (\(Seq Char
a,Seq Char
b) -> Seq Char
a forall a. Seq a -> Seq a -> Seq a
DS.>< Seq Char
b) forall a b. (a -> b) -> a -> b
$
forall (t :: * -> *) a. Foldable t => t a -> [a]
DFold.toList Seq (Seq Char, Seq Char)
zippedff
zippedff :: Seq (Seq Char, Seq Char)
zippedff = forall a. (a -> a -> Ordering) -> Seq a -> Seq a
DS.sortBy (\(Seq Char
a,Seq Char
_) (Seq Char
c,Seq Char
_) -> forall a. Ord a => a -> a -> Ordering
compare Seq Char
a Seq Char
c)
Seq (Seq Char, Seq Char)
zippedf
zippedf :: Seq (Seq Char, Seq Char)
zippedf = Seq (Seq Char, Seq Char)
zippedh
forall a. Seq a -> Seq a -> Seq a
DS.><
Seq (Seq Char, Seq Char)
zippedp
zippedh :: Seq (Seq Char, Seq Char)
zippedh = forall a. a -> Seq a
DS.singleton forall a b. (a -> b) -> a -> b
$
Seq (Seq Char) -> (Seq Char, Seq Char)
grabHeadChunks forall a b. (a -> b) -> a -> b
$
forall a. Int -> Seq a -> Seq (Seq a)
DS.chunksOf ((forall a. Seq a -> Int
DS.length Seq Char
tseq) forall a. Num a => a -> a -> a
- Int
1)
Seq Char
tseq
zippedp :: Seq (Seq Char, Seq Char)
zippedp = forall a b. Seq a -> Seq b -> Seq (a, b)
DS.zip Seq (Seq Char)
suffixesf Seq (Seq Char)
prefixesf
prefixesf :: Seq (Seq Char)
prefixesf = forall a. Int -> Seq a -> Seq a
DS.take ((forall a. Seq a -> Int
DS.length Seq (Seq Char)
prefixes) forall a. Num a => a -> a -> a
- Int
1)
Seq (Seq Char)
prefixes
suffixesf :: Seq (Seq Char)
suffixesf = forall a. Int -> Seq a -> Seq a
DS.drop Int
1
Seq (Seq Char)
suffixes
suffixes :: Seq (Seq Char)
suffixes = forall a. (a -> Bool) -> Seq a -> Seq a
DS.filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Seq a -> Bool
DS.null)
(forall a. Seq a -> Seq (Seq a)
DS.tails Seq Char
tseq)
prefixes :: Seq (Seq Char)
prefixes = forall a. (a -> Bool) -> Seq a -> Seq a
DS.filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Seq a -> Bool
DS.null)
(forall a. Seq a -> Seq (Seq a)
DS.inits Seq Char
tseq)
tseq :: Seq Char
tseq = (forall a. [a] -> Seq a
DS.fromList String
t) forall a. Seq a -> a -> Seq a
DS.|> Char
'$'