{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE MultiWayIf    #-}
{-# LANGUAGE ViewPatterns  #-}
{-# LANGUAGE Strict        #-}


-- |
-- Module      :  Data.BWT
-- Copyright   :  (c) Matthew Mosior 2022
-- License     :  BSD-style
-- Maintainer  :  mattm.github@gmail.com
-- Portability :  portable
--
-- = Burrows-Wheeler Transform (BWT)
-- 
-- The two functions that most users will utilize are 'toBWT' and 'fromBWT'.
-- There are auxilary function(s) inside of @"Data.BWT.Internal"@.
--
-- The helper functions for ByteString, 'bytestringToBWT', 'bytestringFromWord8BWT', 'bytestringFromByteStringBWT' and Text, 'textToBWT' and 'textFromBWT' should help for common use cases.
--
-- @"Data.BWT.Internal"@ also has the function 'createBWTMatrix', which can be useful as well, although not used by either 'toBWT' or 'fromBWT'.


module Data.BWT ( -- * To BWT functions
                  toBWT,
                  bytestringToBWT,
                  TextBWT(..),
                  textToBWT,
                  -- * From BWT functions
                  fromBWT,
                  bytestringFromWord8BWT,
                  bytestringFromByteStringBWT,
                  textFromBWT
                ) where

import Data.BWT.Internal

import Control.Monad()
import Control.Monad.ST as CMST
import Control.Monad.State.Strict()
import Data.ByteString as BS (ByteString,concat,pack,unpack)
import Data.Foldable as DFold (toList)
import Data.Sequence as DS (Seq(..),fromList,iterateN,length,unstableSortBy,zip)
import Data.STRef()
import Data.Text (Text)
import Data.Text.Encoding as DTE (decodeUtf8,encodeUtf8)
import Data.Word (Word8)
import GHC.Generics(Generic)


{-toBWT Function(s)-}

-- | Takes a String and returns the Burrows-Wheeler Transform (BWT).
-- Implemented via a 'SuffixArray'.
toBWT :: Ord a
      => [a]
      -> BWT a
toBWT :: forall a. Ord a => [a] -> BWT a
toBWT [] = forall a. Seq (Maybe a) -> BWT a
BWT forall a. Seq a
DS.Empty
toBWT [a]
xs = do
  let saxs :: SuffixArray a
saxs = forall a. Ord a => Seq a -> SuffixArray a
createSuffixArray Seq a
xss
  forall a. Seq (Maybe a) -> BWT a
BWT (forall a. SuffixArray a -> Seq a -> Seq (Maybe a)
saToBWT SuffixArray a
saxs
               Seq a
xss)
    where
      xss :: Seq a
xss = forall a. [a] -> Seq a
DS.fromList [a]
xs

-- | Helper function for converting a 'ByteString'
-- to a 'BWT' 'Word8'.
bytestringToBWT :: ByteString
                -> BWT Word8
bytestringToBWT :: ByteString -> BWT Word8
bytestringToBWT = forall a. Ord a => [a] -> BWT a
toBWT forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [Word8]
BS.unpack

-- | A newtype to ensure you only uncompress a 'BWT' created
-- from 'textToBWT', since ['Word8'] -> 'Text' is partial.
newtype TextBWT = TextBWT (BWT Word8)
  deriving (TextBWT -> TextBWT -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TextBWT -> TextBWT -> Bool
$c/= :: TextBWT -> TextBWT -> Bool
== :: TextBWT -> TextBWT -> Bool
$c== :: TextBWT -> TextBWT -> Bool
Eq,Eq TextBWT
TextBWT -> TextBWT -> Bool
TextBWT -> TextBWT -> Ordering
TextBWT -> TextBWT -> TextBWT
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: TextBWT -> TextBWT -> TextBWT
$cmin :: TextBWT -> TextBWT -> TextBWT
max :: TextBWT -> TextBWT -> TextBWT
$cmax :: TextBWT -> TextBWT -> TextBWT
>= :: TextBWT -> TextBWT -> Bool
$c>= :: TextBWT -> TextBWT -> Bool
> :: TextBWT -> TextBWT -> Bool
$c> :: TextBWT -> TextBWT -> Bool
<= :: TextBWT -> TextBWT -> Bool
$c<= :: TextBWT -> TextBWT -> Bool
< :: TextBWT -> TextBWT -> Bool
$c< :: TextBWT -> TextBWT -> Bool
compare :: TextBWT -> TextBWT -> Ordering
$ccompare :: TextBWT -> TextBWT -> Ordering
Ord,Int -> TextBWT -> ShowS
[TextBWT] -> ShowS
TextBWT -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TextBWT] -> ShowS
$cshowList :: [TextBWT] -> ShowS
show :: TextBWT -> String
$cshow :: TextBWT -> String
showsPrec :: Int -> TextBWT -> ShowS
$cshowsPrec :: Int -> TextBWT -> ShowS
Show,ReadPrec [TextBWT]
ReadPrec TextBWT
Int -> ReadS TextBWT
ReadS [TextBWT]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [TextBWT]
$creadListPrec :: ReadPrec [TextBWT]
readPrec :: ReadPrec TextBWT
$creadPrec :: ReadPrec TextBWT
readList :: ReadS [TextBWT]
$creadList :: ReadS [TextBWT]
readsPrec :: Int -> ReadS TextBWT
$creadsPrec :: Int -> ReadS TextBWT
Read,forall x. Rep TextBWT x -> TextBWT
forall x. TextBWT -> Rep TextBWT x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep TextBWT x -> TextBWT
$cfrom :: forall x. TextBWT -> Rep TextBWT x
Generic)

-- | Helper function for converting 'Text'
-- to a 'TextBWT'.
textToBWT :: Text
          -> TextBWT
textToBWT :: Text -> TextBWT
textToBWT = BWT Word8 -> TextBWT
TextBWT forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> BWT Word8
bytestringToBWT forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
DTE.encodeUtf8

{-------------------}


{-fromBWT function(s)-}

-- | Takes a BWT data type (please see @"Data.BWT.Internal"@) and inverts it back to the original string.
-- 
-- This function utilizes the state monad (strict) in order
-- to implement the [Magic](https://www.youtube.com/watch?v=QwSsppKrCj4) Inverse BWT algorithm by backtracking
-- indices starting with the (__Nothing__,_) entry.
fromBWT :: Ord a
        => BWT a
        -> [a]
fromBWT :: forall a. Ord a => BWT a -> [a]
fromBWT BWT a
bwt = do
  let originall :: BWTSeq a
originall = forall a. (forall s. ST s a) -> a
CMST.runST forall a b. (a -> b) -> a -> b
$ forall a s. Seq (Maybe a, Int) -> ST s (BWTSeq a)
magicInverseBWT Seq (Maybe a, Int)
magicsz
  forall (t :: * -> *) a. Foldable t => t a -> [a]
DFold.toList BWTSeq a
originall
    where
      magicsz :: Seq (Maybe a, Int)
magicsz = forall a. (a -> a -> Ordering) -> Seq a -> Seq a
DS.unstableSortBy (\(Maybe a
a,Int
b) (Maybe a
c,Int
d) -> forall a1 a2. (Ord a1, Ord a2) => (a1, a2) -> (a1, a2) -> Ordering
sortTB (Maybe a
a,Int
b) (Maybe a
c,Int
d))
                Seq (Maybe a, Int)
zipped
      zipped :: Seq (Maybe a, Int)
zipped  = forall a b. Seq a -> Seq b -> Seq (a, b)
DS.zip Seq (Maybe a)
bwtt
                       (forall a. Int -> (a -> a) -> a -> Seq a
DS.iterateN (forall a. Seq a -> Int
DS.length Seq (Maybe a)
bwtt) (forall a. Num a => a -> a -> a
+Int
1) Int
0)
      bwtt :: Seq (Maybe a)
bwtt    = (\(BWT Seq (Maybe a)
t) -> Seq (Maybe a)
t) BWT a
bwt

-- | Helper function for converting a 'BWT' of 'Word8's
-- to a 'ByteString'.
bytestringFromWord8BWT :: BWT Word8
                       -> ByteString
bytestringFromWord8BWT :: BWT Word8 -> ByteString
bytestringFromWord8BWT = [Word8] -> ByteString
BS.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Ord a => BWT a -> [a]
fromBWT

-- | Helper function for converting a 'BWT' 'ByteString's
-- to a 'ByteString'.
bytestringFromByteStringBWT :: BWT ByteString
                            -> ByteString
bytestringFromByteStringBWT :: BWT ByteString -> ByteString
bytestringFromByteStringBWT = [ByteString] -> ByteString
BS.concat forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Ord a => BWT a -> [a]
fromBWT

-- | Helper function for converting 'TextBWT'
-- to a 'Text'
textFromBWT :: TextBWT
            -> Text
textFromBWT :: TextBWT -> Text
textFromBWT (TextBWT BWT Word8
x) = ByteString -> Text
DTE.decodeUtf8 forall a b. (a -> b) -> a -> b
$
                          BWT Word8 -> ByteString
bytestringFromWord8BWT BWT Word8
x

{---------------------}