{-# LANGUAGE MultiWayIf   #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE Strict       #-}


-- |
-- Module      :  Data.BWT
-- Copyright   :  (c) Matthew Mosior 2022
-- License     :  BSD-style
-- Maintainer  :  mattm.github@gmail.com
-- Portability :  portable
--
-- = Burrows-Wheeler Transform (BWT)
-- 
-- The two functions that most users will utilize are 'toBWT' and 'fromBWT'.
-- There are auxilary function(s) inside of @"Data.BWT.Internal"@.
--
-- @"Data.BWT.Internal"@ also has the function 'createBWTMatrix', which can be useful as well, although not used by either 'toBWT' or 'fromBWT'.


module Data.BWT where

import Data.BWT.Internal

import Control.Monad()
import Control.Monad.ST as CMST
import Control.Monad.State.Strict()
import Data.Foldable as DFold (toList)
import Data.Sequence as DS
import Data.STRef()


{-toBWT function(s).-}

-- | Takes a String and returns the Burrows-Wheeler Transform (BWT).
-- Implemented via a 'SuffixArray'.
--
-- Works with alphanumeric characters (A-Za-z0-9), as well as special characters `~?!@#%^&*()_+<>';:[]{}/\|"-.,
-- Does __NOT__ work with an input containing the __$__ character.
-- 
-- Appends the __$__ character to the input automatically.
toBWT :: String -> BWT
toBWT :: String -> BWT
toBWT [] = forall a. Seq a
DS.Empty
toBWT String
xs = do
  let saxs :: SuffixArray
saxs = BWT -> SuffixArray
createSuffixArray BWT
xseos
  SuffixArray -> BWT -> BWT
saToBWT SuffixArray
saxs
          BWT
xseos
    where
      xseos :: BWT
xseos = (forall a. [a] -> Seq a
DS.fromList String
xs) forall a. Seq a -> a -> Seq a
DS.|> Char
'$'

{--------------------}


{-fromBWT function(s).-}

-- | Takes a BWT data type (please see @"Data.BWT.Internal"@) and inverts it back to the original string.
-- 
-- This function utilizes the state monad (strict) in order
-- to implement the [Magic](https://www.youtube.com/watch?v=QwSsppKrCj4) Inverse BWT algorithm by backtracking
-- indices starting with the __$__ character.
fromBWT :: BWT -> String
fromBWT :: BWT -> String
fromBWT BWT
bwt = do
  let originalp :: BWT
originalp = forall a. (forall s. ST s a) -> a
CMST.runST forall a b. (a -> b) -> a -> b
$ forall s. Seq (Char, Int) -> ST s BWT
magicInverseBWT Seq (Char, Int)
magicsz
  forall (t :: * -> *) a. Foldable t => t a -> [a]
DFold.toList forall a b. (a -> b) -> a -> b
$ forall a. Int -> Seq a -> Seq a
DS.take ((forall a. Seq a -> Int
DS.length BWT
originalp) forall a. Num a => a -> a -> a
- Int
1)
                         BWT
originalp
    where
      magicsz :: Seq (Char, Int)
magicsz = forall a. (a -> a -> Ordering) -> Seq a -> Seq a
DS.sortBy (\(Char
a,Int
b) (Char
c,Int
d) -> forall a1 a2. (Ord a1, Ord a2) => (a1, a2) -> (a1, a2) -> Ordering
sortTB (Char
a,Int
b) (Char
c,Int
d))
                Seq (Char, Int)
zipped
      zipped :: Seq (Char, Int)
zipped  = forall a b. Seq a -> Seq b -> Seq (a, b)
DS.zip BWT
bwt
                       (forall a. Int -> (a -> a) -> a -> Seq a
DS.iterateN (forall a. Seq a -> Int
DS.length BWT
bwt) (forall a. Num a => a -> a -> a
+Int
1) Int
0)

{----------------------}