{-# LANGUAGE DeriveGeneric     #-}
{-# LANGUAGE MultiWayIf        #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns      #-}
{-# LANGUAGE Strict            #-}


-- |
-- Module      :  Data.FMIndex
-- Copyright   :  (c) Matthew Mosior 2022
-- License     :  BSD-style
-- Maintainer  :  mattm.github@gmail.com
-- Portability :  portable
--
-- = Full-text Minute-space index (FM-index)
--
-- Users will get the most mileage by first compressing to a 'BWT'
-- on the initial 'ByteString' or 'Text' input before compressing to
-- a 'FMIndexB' or 'FMIndexT'.
--
-- To do this, users can use the 'bytestringToBWTToFMIndexB' and 'bytestringToBWTToFMIndexT' functions,
-- as well as the 'textToBWTToFMIndexB' and 'textToBWTToFMIndexT' functions.
--
-- = Operation: Count
--
-- The count operation is supported by both sequential, 'bytestringFMIndexCountS' and 'textFMIndexCountS'
-- and parallel, 'bytestringFMIndexCountP' and 'textFMIndexCountP' , implementations.
--
-- The count operations on 'ByteString', 'bytestringFMIndexCountS' and 'bytestringFMIndexCountP', are implemented using the 'countFMIndexB' function.
--
-- The count operations on 'Text', 'textFMIndexCountS' and 'textFMIndexCountP', are implemented using the 'countFMIndexT' function.
--
-- = Operation: Locate
--
-- The locate operation is supported by both sequential, 'bytestringFMIndexLocateS' and 'textFMIndexLocateS'
-- and parallel, 'bytestringFMIndexLocateP' and 'textFMIndexLocateP' , implementations.
--
-- The locate operations on 'ByteString', 'bytestringFMIndexLocateS' and 'bytestringFMIndexLocateP', are implemented using the 'locateFMIndexB' function.
--
-- The locate operations on 'Text', 'textFMIndexLocateS' and 'textFMIndexLocateP', are implemented using the 'locateFMIndexT' function.
--
-- = Internal
--
-- @"Data.FMIndex.Internal"@ contains efficient and stateful implementations of the FMIndex and Inverse FMIndex algorithms.


module Data.FMIndex ( -- * To FMIndex functions
                      bytestringToBWTToFMIndexB,
                      bytestringToBWTToFMIndexT,
                      textToBWTToFMIndexB,
                      textToBWTToFMIndexT,
                      textBWTToFMIndexB,
                      bytestringBWTToFMIndexB,
                      textBWTToFMIndexT,
                      bytestringBWTToFMIndexT,    
                      -- * From FMIndex functions
                      bytestringFromBWTFromFMIndexB,
                      bytestringFromBWTFromFMIndexT,
                      textFromBWTFromFMIndexB,
                      textFromBWTFromFMIndexT,
                      textBWTFromFMIndexT,
                      bytestringBWTFromFMIndexT,
                      textBWTFromFMIndexB,
                      bytestringBWTFromFMIndexB,
                      textFromFMIndexB,
                      bytestringFromFMIndexB,
                      textFromFMIndexT,
                      bytestringFromFMIndexT,
                      -- * Count operations
                      bytestringFMIndexCountS,
                      textFMIndexCountS,
                      bytestringFMIndexCountP,
                      textFMIndexCountP,
                      -- * Locate operations
                      bytestringFMIndexLocateS,
                      textFMIndexLocateS,
                      bytestringFMIndexLocateP,
                      textFMIndexLocateP
                    ) where

import Data.BWT
import Data.BWT.Internal
import Data.FMIndex.Internal

import Control.Concurrent as CC (getNumCapabilities)
import Control.Monad()
import Control.Monad.ST as CMST
import Control.Monad.State.Strict()
import Control.Parallel.Strategies as CPS
import Data.ByteString as BS
import Data.ByteString.Char8 as BSC8 (singleton,uncons,unpack)
import Data.Char()
import Data.Foldable()
import Data.Maybe as DMaybe (isNothing,fromJust)
import Data.Sequence as DS (Seq(..),ViewL(..),fromList,index,viewl,(<|))
import Data.STRef()
import Data.Text as DText
import Data.Text.Encoding as DTE (decodeUtf8,encodeUtf8)
import Data.Word (Word8)
import Prelude as P


{-toFMIndex Function(s)-}

-- | Helper function for converting a 'ByteString'
-- to a 'FMIndexB' via a 'BWT' first.
bytestringToBWTToFMIndexB :: ByteString ->
                             FMIndexB
bytestringToBWTToFMIndexB :: ByteString -> FMIndexB
bytestringToBWTToFMIndexB ByteString
xs = BWTMatrix Word8 -> BWT Word8 -> FMIndexB
bytestringBWTToFMIndexB (forall a. Ord a => [a] -> BWTMatrix a
createBWTMatrix forall a b. (a -> b) -> a -> b
$ ByteString -> [Word8]
BS.unpack ByteString
xs)
                                                       (ByteString -> BWT Word8
bytestringToBWT ByteString
xs)

-- | Helper function for converting a 'ByteString'
-- to a 'FMIndexT' via a 'BWT' first.
bytestringToBWTToFMIndexT :: ByteString ->
                             FMIndexT
bytestringToBWTToFMIndexT :: ByteString -> FMIndexT
bytestringToBWTToFMIndexT ByteString
xs = BWTMatrix Word8 -> BWT Word8 -> FMIndexT
bytestringBWTToFMIndexT (forall a. Ord a => [a] -> BWTMatrix a
createBWTMatrix forall a b. (a -> b) -> a -> b
$ ByteString -> [Word8]
BS.unpack ByteString
xs)
                                                       (ByteString -> BWT Word8
bytestringToBWT ByteString
xs)

-- | Helper function for converting a 'Text'
-- to a 'FMIndexB' via a 'BWT' first.
textToBWTToFMIndexB :: Text ->
                       FMIndexB
textToBWTToFMIndexB :: Text -> FMIndexB
textToBWTToFMIndexB Text
xs = BWTMatrix Word8 -> TextBWT -> FMIndexB
textBWTToFMIndexB (forall a. Ord a => [a] -> BWTMatrix a
createBWTMatrix forall a b. (a -> b) -> a -> b
$ ByteString -> [Word8]
BS.unpack forall a b. (a -> b) -> a -> b
$ Text -> ByteString
DTE.encodeUtf8 Text
xs)
                                           (Text -> TextBWT
textToBWT Text
xs)

-- | Helper function for converting a 'Text'
-- to a 'FMIndexT' via a 'BWT' first.
textToBWTToFMIndexT :: Text ->
                       FMIndexT
textToBWTToFMIndexT :: Text -> FMIndexT
textToBWTToFMIndexT Text
xs = BWTMatrix Word8 -> TextBWT -> FMIndexT
textBWTToFMIndexT (forall a. Ord a => [a] -> BWTMatrix a
createBWTMatrix forall a b. (a -> b) -> a -> b
$ ByteString -> [Word8]
BS.unpack forall a b. (a -> b) -> a -> b
$ Text -> ByteString
DTE.encodeUtf8 Text
xs)
                                           (Text -> TextBWT
textToBWT Text
xs)

-- | Take a 'BWT' of 'Word8's and generate the
-- FM-index ('FMIndexB').
textBWTToFMIndexB :: BWTMatrix Word8
                  -> TextBWT
                  -> FMIndexB
textBWTToFMIndexB :: BWTMatrix Word8 -> TextBWT -> FMIndexB
textBWTToFMIndexB (BWTMatrix Seq (Seq (Maybe Word8))
DS.Empty) TextBWT
_  = (CcB, OccCKB, SAB) -> FMIndexB
FMIndexB (Seq (Int, Maybe ByteString) -> CcB
CcB forall a. Seq a
DS.Empty,Seq (Maybe ByteString, Seq (Int, Int, Maybe ByteString)) -> OccCKB
OccCKB forall a. Seq a
DS.Empty,SuffixArray ByteString -> SAB
SAB forall a. Seq a
DS.Empty)
textBWTToFMIndexB BWTMatrix Word8
bwm                  TextBWT
xs = do
  let occckb :: Seq (Maybe ByteString, Seq (Int, Int, Maybe ByteString))
occckb = forall a. (forall s. ST s a) -> a
CMST.runST forall a b. (a -> b) -> a -> b
$ forall s.
PBOccCKSeqB
-> ST s (Seq (Maybe ByteString, Seq (Int, Int, Maybe ByteString)))
seqToOccCKB PBOccCKSeqB
xss
  let ccb :: Seq (Int, Maybe ByteString)
ccb    = forall a. (forall s. ST s a) -> a
CMST.runST forall a b. (a -> b) -> a -> b
$ forall s. PBOccCKSeqB -> ST s (Seq (Int, Maybe ByteString))
seqToCcB PBOccCKSeqB
bwmff
  let sab :: SuffixArray ByteString
sab    = forall a. Ord a => Seq a -> SuffixArray a
createSuffixArray     forall a b. (a -> b) -> a -> b
$
               forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Char -> ByteString
BSC8.singleton) forall a b. (a -> b) -> a -> b
$
               forall a. [a] -> Seq a
DS.fromList           forall a b. (a -> b) -> a -> b
$
               Text -> String
DText.unpack          forall a b. (a -> b) -> a -> b
$
               TextBWT -> Text
textFromBWT TextBWT
xs
  (CcB, OccCKB, SAB) -> FMIndexB
FMIndexB (Seq (Int, Maybe ByteString) -> CcB
CcB Seq (Int, Maybe ByteString)
ccb,Seq (Maybe ByteString, Seq (Int, Int, Maybe ByteString)) -> OccCKB
OccCKB Seq (Maybe ByteString, Seq (Int, Int, Maybe ByteString))
occckb,SuffixArray ByteString -> SAB
SAB SuffixArray ByteString
sab)
    where
      bwmf :: Seq (Maybe Word8)
bwmf  = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Seq (Maybe Word8)
x -> case forall a. Seq a -> ViewL a
viewl Seq (Maybe Word8)
x of
                            ViewL (Maybe Word8)
EmptyL       -> forall a. Maybe a
Nothing
                            (Maybe Word8
xh DS.:< Seq (Maybe Word8)
_) -> Maybe Word8
xh
                   ) forall a b. (a -> b) -> a -> b
$
              (\(BWTMatrix Seq (Seq (Maybe Word8))
m) -> Seq (Seq (Maybe Word8))
m) BWTMatrix Word8
bwm
      bwmff :: PBOccCKSeqB
bwmff = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Maybe Word8
x -> if | forall a. Maybe a -> Bool
isNothing Maybe Word8
x
                             -> forall a. Maybe a
Nothing
                             | Bool
otherwise
                             -> forall a. a -> Maybe a
Just         forall a b. (a -> b) -> a -> b
$
                                Word8 -> ByteString
BS.singleton forall a b. (a -> b) -> a -> b
$
                                forall a. HasCallStack => Maybe a -> a
fromJust Maybe Word8
x
                   ) Seq (Maybe Word8)
bwmf
      xss :: PBOccCKSeqB
xss = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Maybe Word8
x -> if | forall a. Maybe a -> Bool
isNothing Maybe Word8
x
                           -> forall a. Maybe a
Nothing
                           | Bool
otherwise
                           -> forall a. a -> Maybe a
Just         forall a b. (a -> b) -> a -> b
$
                              Word8 -> ByteString
BS.singleton forall a b. (a -> b) -> a -> b
$
                              forall a. HasCallStack => Maybe a -> a
fromJust Maybe Word8
x
                 )
            ((\(BWT Seq (Maybe Word8)
t) -> Seq (Maybe Word8)
t) forall a b. (a -> b) -> a -> b
$
            ((\(TextBWT BWT Word8
t) -> BWT Word8
t) TextBWT
xs))

-- | Take a 'BWT' of 'Word8's and generate the
-- FM-index ('FMIndexB').
bytestringBWTToFMIndexB :: BWTMatrix Word8
                        -> BWT Word8
                        -> FMIndexB
bytestringBWTToFMIndexB :: BWTMatrix Word8 -> BWT Word8 -> FMIndexB
bytestringBWTToFMIndexB (BWTMatrix Seq (Seq (Maybe Word8))
DS.Empty) BWT Word8
_  = (CcB, OccCKB, SAB) -> FMIndexB
FMIndexB (Seq (Int, Maybe ByteString) -> CcB
CcB forall a. Seq a
DS.Empty,Seq (Maybe ByteString, Seq (Int, Int, Maybe ByteString)) -> OccCKB
OccCKB forall a. Seq a
DS.Empty,SuffixArray ByteString -> SAB
SAB forall a. Seq a
DS.Empty) 
bytestringBWTToFMIndexB BWTMatrix Word8
bwm                  BWT Word8
xs = do
  let occckb :: Seq (Maybe ByteString, Seq (Int, Int, Maybe ByteString))
occckb = forall a. (forall s. ST s a) -> a
CMST.runST forall a b. (a -> b) -> a -> b
$ forall s.
PBOccCKSeqB
-> ST s (Seq (Maybe ByteString, Seq (Int, Int, Maybe ByteString)))
seqToOccCKB PBOccCKSeqB
xss
  let ccb :: Seq (Int, Maybe ByteString)
ccb    = forall a. (forall s. ST s a) -> a
CMST.runST forall a b. (a -> b) -> a -> b
$ forall s. PBOccCKSeqB -> ST s (Seq (Int, Maybe ByteString))
seqToCcB PBOccCKSeqB
bwmff
  let sab :: SuffixArray ByteString
sab    = forall a. Ord a => Seq a -> SuffixArray a
createSuffixArray   forall a b. (a -> b) -> a -> b
$
               forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Word8 -> ByteString
BS.singleton) forall a b. (a -> b) -> a -> b
$
               forall a. [a] -> Seq a
DS.fromList         forall a b. (a -> b) -> a -> b
$
               ByteString -> [Word8]
BS.unpack           forall a b. (a -> b) -> a -> b
$
               BWT Word8 -> ByteString
bytestringFromWord8BWT BWT Word8
xs
  (CcB, OccCKB, SAB) -> FMIndexB
FMIndexB (Seq (Int, Maybe ByteString) -> CcB
CcB Seq (Int, Maybe ByteString)
ccb,Seq (Maybe ByteString, Seq (Int, Int, Maybe ByteString)) -> OccCKB
OccCKB Seq (Maybe ByteString, Seq (Int, Int, Maybe ByteString))
occckb,SuffixArray ByteString -> SAB
SAB SuffixArray ByteString
sab)
    where
      bwmf :: Seq (Maybe Word8)
bwmf  = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Seq (Maybe Word8)
x -> case forall a. Seq a -> ViewL a
viewl Seq (Maybe Word8)
x of
                            ViewL (Maybe Word8)
EmptyL       -> forall a. Maybe a
Nothing
                            (Maybe Word8
xh DS.:< Seq (Maybe Word8)
_) -> Maybe Word8
xh
                   ) forall a b. (a -> b) -> a -> b
$
              (\(BWTMatrix Seq (Seq (Maybe Word8))
m) -> Seq (Seq (Maybe Word8))
m) BWTMatrix Word8
bwm
      bwmff :: PBOccCKSeqB
bwmff = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Maybe Word8
x -> if | forall a. Maybe a -> Bool
isNothing Maybe Word8
x
                             -> forall a. Maybe a
Nothing
                             | Bool
otherwise
                             -> forall a. a -> Maybe a
Just         forall a b. (a -> b) -> a -> b
$
                                Word8 -> ByteString
BS.singleton forall a b. (a -> b) -> a -> b
$
                                forall a. HasCallStack => Maybe a -> a
fromJust Maybe Word8
x
                   ) Seq (Maybe Word8)
bwmf
      xss :: PBOccCKSeqB
xss   = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Maybe Word8
x -> if | forall a. Maybe a -> Bool
isNothing Maybe Word8
x
                             -> forall a. Maybe a
Nothing
                             | Bool
otherwise
                             -> forall a. a -> Maybe a
Just         forall a b. (a -> b) -> a -> b
$
                                Word8 -> ByteString
BS.singleton forall a b. (a -> b) -> a -> b
$
                                forall a. HasCallStack => Maybe a -> a
fromJust Maybe Word8
x
                   )
              ((\(BWT Seq (Maybe Word8)
t) -> Seq (Maybe Word8)
t) BWT Word8
xs)

-- | Take a 'BWT' of 'Word8's and generate the
-- FM-index ('FMIndexB').
textBWTToFMIndexT :: BWTMatrix Word8
                  -> TextBWT
                  -> FMIndexT
textBWTToFMIndexT :: BWTMatrix Word8 -> TextBWT -> FMIndexT
textBWTToFMIndexT (BWTMatrix Seq (Seq (Maybe Word8))
DS.Empty) TextBWT
_  = (CcT, OccCKT, SAT) -> FMIndexT
FMIndexT (Seq (Int, Maybe Text) -> CcT
CcT forall a. Seq a
DS.Empty,Seq (Maybe Text, Seq (Int, Int, Maybe Text)) -> OccCKT
OccCKT forall a. Seq a
DS.Empty,SuffixArray Text -> SAT
SAT forall a. Seq a
DS.Empty)
textBWTToFMIndexT BWTMatrix Word8
bwm                  TextBWT
xs = do
  let occckt :: Seq (Maybe Text, Seq (Int, Int, Maybe Text))
occckt = forall a. (forall s. ST s a) -> a
CMST.runST forall a b. (a -> b) -> a -> b
$ forall s.
PTOccCKSeqT -> ST s (Seq (Maybe Text, Seq (Int, Int, Maybe Text)))
seqToOccCKT PTOccCKSeqT
xss
  let cct :: Seq (Int, Maybe Text)
cct    = forall a. (forall s. ST s a) -> a
CMST.runST forall a b. (a -> b) -> a -> b
$ forall s. PTOccCKSeqT -> ST s (Seq (Int, Maybe Text))
seqToCcT PTOccCKSeqT
bwmff
  let sat :: SuffixArray Text
sat    = forall a. Ord a => Seq a -> SuffixArray a
createSuffixArray      forall a b. (a -> b) -> a -> b
$
               forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Char -> Text
DText.singleton) forall a b. (a -> b) -> a -> b
$
               forall a. [a] -> Seq a
DS.fromList            forall a b. (a -> b) -> a -> b
$
               Text -> String
DText.unpack           forall a b. (a -> b) -> a -> b
$
               TextBWT -> Text
textFromBWT TextBWT
xs 
  (CcT, OccCKT, SAT) -> FMIndexT
FMIndexT (Seq (Int, Maybe Text) -> CcT
CcT Seq (Int, Maybe Text)
cct,Seq (Maybe Text, Seq (Int, Int, Maybe Text)) -> OccCKT
OccCKT Seq (Maybe Text, Seq (Int, Int, Maybe Text))
occckt,SuffixArray Text -> SAT
SAT SuffixArray Text
sat)
    where
      bwmf :: Seq (Maybe Word8)
bwmf  = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Seq (Maybe Word8)
x -> case forall a. Seq a -> ViewL a
viewl Seq (Maybe Word8)
x of
                            ViewL (Maybe Word8)
EmptyL       -> forall a. Maybe a
Nothing
                            (Maybe Word8
xh DS.:< Seq (Maybe Word8)
_) -> Maybe Word8
xh
                   ) forall a b. (a -> b) -> a -> b
$
              (\(BWTMatrix Seq (Seq (Maybe Word8))
m) -> Seq (Seq (Maybe Word8))
m) BWTMatrix Word8
bwm
      bwmff :: PTOccCKSeqT
bwmff = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Maybe Word8
x -> if | forall a. Maybe a -> Bool
isNothing Maybe Word8
x
                             -> forall a. Maybe a
Nothing
                             | Bool
otherwise
                             -> forall a. a -> Maybe a
Just           forall a b. (a -> b) -> a -> b
$
                                ByteString -> Text
DTE.decodeUtf8 forall a b. (a -> b) -> a -> b
$
                                Word8 -> ByteString
BS.singleton   forall a b. (a -> b) -> a -> b
$
                                forall a. HasCallStack => Maybe a -> a
fromJust Maybe Word8
x
                   ) Seq (Maybe Word8)
bwmf
      xss :: PTOccCKSeqT
xss = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Maybe Word8
x -> if | forall a. Maybe a -> Bool
isNothing Maybe Word8
x
                           -> forall a. Maybe a
Nothing
                           | Bool
otherwise
                           -> forall a. a -> Maybe a
Just           forall a b. (a -> b) -> a -> b
$
                              ByteString -> Text
DTE.decodeUtf8 forall a b. (a -> b) -> a -> b
$
                              Word8 -> ByteString
BS.singleton   forall a b. (a -> b) -> a -> b
$
                              forall a. HasCallStack => Maybe a -> a
fromJust Maybe Word8
x
                 )
            ((\(BWT Seq (Maybe Word8)
t) -> Seq (Maybe Word8)
t) forall a b. (a -> b) -> a -> b
$
            ((\(TextBWT BWT Word8
t) -> BWT Word8
t) TextBWT
xs))

-- | Take a 'BWT' of 'Word8's and generate the
-- FM-index ('FMIndexT').
bytestringBWTToFMIndexT :: BWTMatrix Word8
                        -> BWT Word8
                        -> FMIndexT
bytestringBWTToFMIndexT :: BWTMatrix Word8 -> BWT Word8 -> FMIndexT
bytestringBWTToFMIndexT (BWTMatrix Seq (Seq (Maybe Word8))
DS.Empty) BWT Word8
_  = (CcT, OccCKT, SAT) -> FMIndexT
FMIndexT (Seq (Int, Maybe Text) -> CcT
CcT forall a. Seq a
DS.Empty,Seq (Maybe Text, Seq (Int, Int, Maybe Text)) -> OccCKT
OccCKT forall a. Seq a
DS.Empty,SuffixArray Text -> SAT
SAT forall a. Seq a
DS.Empty)
bytestringBWTToFMIndexT BWTMatrix Word8
bwm                  BWT Word8
xs = do
  let occckt :: Seq (Maybe Text, Seq (Int, Int, Maybe Text))
occckt = forall a. (forall s. ST s a) -> a
CMST.runST forall a b. (a -> b) -> a -> b
$ forall s.
PTOccCKSeqT -> ST s (Seq (Maybe Text, Seq (Int, Int, Maybe Text)))
seqToOccCKT PTOccCKSeqT
xss
  let cct :: Seq (Int, Maybe Text)
cct    = forall a. (forall s. ST s a) -> a
CMST.runST forall a b. (a -> b) -> a -> b
$ forall s. PTOccCKSeqT -> ST s (Seq (Int, Maybe Text))
seqToCcT PTOccCKSeqT
bwmff
  let sat :: SuffixArray Text
sat    = forall a. Ord a => Seq a -> SuffixArray a
createSuffixArray forall a b. (a -> b) -> a -> b
$
               forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Char -> Text
DText.singleton) forall a b. (a -> b) -> a -> b
$
               forall a. [a] -> Seq a
DS.fromList            forall a b. (a -> b) -> a -> b
$
               Text -> String
DText.unpack           forall a b. (a -> b) -> a -> b
$
               ByteString -> Text
DTE.decodeUtf8         forall a b. (a -> b) -> a -> b
$
               BWT Word8 -> ByteString
bytestringFromWord8BWT BWT Word8
xs
  (CcT, OccCKT, SAT) -> FMIndexT
FMIndexT (Seq (Int, Maybe Text) -> CcT
CcT Seq (Int, Maybe Text)
cct,Seq (Maybe Text, Seq (Int, Int, Maybe Text)) -> OccCKT
OccCKT Seq (Maybe Text, Seq (Int, Int, Maybe Text))
occckt,SuffixArray Text -> SAT
SAT SuffixArray Text
sat)
    where
      bwmf :: Seq (Maybe Word8)
bwmf  = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Seq (Maybe Word8)
x -> case forall a. Seq a -> ViewL a
viewl Seq (Maybe Word8)
x of
                            ViewL (Maybe Word8)
EmptyL       -> forall a. Maybe a
Nothing
                            (Maybe Word8
xh DS.:< Seq (Maybe Word8)
_) -> Maybe Word8
xh
                   ) forall a b. (a -> b) -> a -> b
$
              (\(BWTMatrix Seq (Seq (Maybe Word8))
m) -> Seq (Seq (Maybe Word8))
m) BWTMatrix Word8
bwm
      bwmff :: PTOccCKSeqT
bwmff = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Maybe Word8
x -> if | forall a. Maybe a -> Bool
isNothing Maybe Word8
x
                             -> forall a. Maybe a
Nothing
                             | Bool
otherwise
                             -> forall a. a -> Maybe a
Just           forall a b. (a -> b) -> a -> b
$
                                ByteString -> Text
DTE.decodeUtf8 forall a b. (a -> b) -> a -> b
$
                                Word8 -> ByteString
BS.singleton   forall a b. (a -> b) -> a -> b
$
                                forall a. HasCallStack => Maybe a -> a
fromJust Maybe Word8
x
                   ) Seq (Maybe Word8)
bwmf
      xss :: PTOccCKSeqT
xss = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Maybe Word8
x -> if | forall a. Maybe a -> Bool
isNothing Maybe Word8
x
                           -> forall a. Maybe a
Nothing
                           | Bool
otherwise
                           -> forall a. a -> Maybe a
Just           forall a b. (a -> b) -> a -> b
$
                              ByteString -> Text
DTE.decodeUtf8 forall a b. (a -> b) -> a -> b
$
                              Word8 -> ByteString
BS.singleton   forall a b. (a -> b) -> a -> b
$
                              forall a. HasCallStack => Maybe a -> a
fromJust Maybe Word8
x
                 )
            ((\(BWT Seq (Maybe Word8)
t) -> Seq (Maybe Word8)
t) BWT Word8
xs)

{-----------------------}


{-fromFMIndex function(s)-}

-- | Helper function for converting a 'BWT'ed 'FMIndexB'
-- back to the original 'ByteString'.
bytestringFromBWTFromFMIndexB :: FMIndexB
                              -> ByteString
bytestringFromBWTFromFMIndexB :: FMIndexB -> ByteString
bytestringFromBWTFromFMIndexB = BWT ByteString -> ByteString
bytestringFromByteStringBWT forall b c a. (b -> c) -> (a -> b) -> a -> c
. FMIndexB -> BWT ByteString
bytestringBWTFromFMIndexB

-- | Helper function for converting a 'BWT'ed 'FMIndexT'
-- back to the original 'ByteString'.
bytestringFromBWTFromFMIndexT :: FMIndexT
                              -> ByteString
bytestringFromBWTFromFMIndexT :: FMIndexT -> ByteString
bytestringFromBWTFromFMIndexT FMIndexT
xs = BWT ByteString -> ByteString
bytestringFromByteStringBWT forall a b. (a -> b) -> a -> b
$
                                   forall a. Seq (Maybe a) -> BWT a
BWT                             forall a b. (a -> b) -> a -> b
$
                                   forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Maybe Text
x -> if | forall a. Maybe a -> Bool
isNothing Maybe Text
x
                                                  -> forall a. Maybe a
Nothing
                                                  | Bool
otherwise
                                                  -> forall a. a -> Maybe a
Just           forall a b. (a -> b) -> a -> b
$
                                                     Text -> ByteString
DTE.encodeUtf8 forall a b. (a -> b) -> a -> b
$
                                                     forall a. HasCallStack => Maybe a -> a
fromJust Maybe Text
x
                                        )
                                                           forall a b. (a -> b) -> a -> b
$
                                   ((\(BWT PTOccCKSeqT
t) -> PTOccCKSeqT
t) (FMIndexT -> BWT Text
textBWTFromFMIndexT FMIndexT
xs))

-- | Helper function for converting a 'BWT'ed 'FMIndexB'
-- back to the original 'Text'.
textFromBWTFromFMIndexB :: FMIndexB
                        -> Text
textFromBWTFromFMIndexB :: FMIndexB -> Text
textFromBWTFromFMIndexB = ByteString -> Text
DTE.decodeUtf8 forall b c a. (b -> c) -> (a -> b) -> a -> c
. BWT ByteString -> ByteString
bytestringFromByteStringBWT forall b c a. (b -> c) -> (a -> b) -> a -> c
. FMIndexB -> BWT ByteString
bytestringBWTFromFMIndexB

-- | Helper function for converting a 'BWT'ed 'FMIndexT'
-- back to the original 'Text'.
textFromBWTFromFMIndexT :: FMIndexT
                        -> Text
textFromBWTFromFMIndexT :: FMIndexT -> Text
textFromBWTFromFMIndexT = ByteString -> Text
DTE.decodeUtf8 forall b c a. (b -> c) -> (a -> b) -> a -> c
. BWT ByteString -> ByteString
bytestringFromByteStringBWT forall b c a. (b -> c) -> (a -> b) -> a -> c
. FMIndexT -> BWT ByteString
bytestringBWTFromFMIndexT

-- | Takes a 'FMIndexT' and returns
-- the 'BWT' of 'Text's.
textBWTFromFMIndexT :: FMIndexT
                    -> BWT Text
textBWTFromFMIndexT :: FMIndexT -> BWT Text
textBWTFromFMIndexT (FMIndexT (CcT Seq (Int, Maybe Text)
DS.Empty,OccCKT
_,SAT
_))    = forall a. Seq (Maybe a) -> BWT a
BWT forall a. Seq a
DS.Empty
textBWTFromFMIndexT (FMIndexT (CcT
_,OccCKT Seq (Maybe Text, Seq (Int, Int, Maybe Text))
DS.Empty,SAT
_)) = forall a. Seq (Maybe a) -> BWT a
BWT forall a. Seq a
DS.Empty
textBWTFromFMIndexT (FMIndexT (CcT
_,OccCKT
_,SAT SuffixArray Text
DS.Empty))    = forall a. Seq (Maybe a) -> BWT a
BWT forall a. Seq a
DS.Empty
textBWTFromFMIndexT FMIndexT
xs                               =
  forall a. Seq (Maybe a) -> BWT a
BWT (FMIndexT -> PTOccCKSeqT
seqFromFMIndexT FMIndexT
xs)

-- | Takes a 'FMIndexT' and returns
-- the 'BWT' of 'ByteString's.
bytestringBWTFromFMIndexT :: FMIndexT
                          -> BWT ByteString
bytestringBWTFromFMIndexT :: FMIndexT -> BWT ByteString
bytestringBWTFromFMIndexT (FMIndexT (CcT Seq (Int, Maybe Text)
DS.Empty,OccCKT
_,SAT
_))    = forall a. Seq (Maybe a) -> BWT a
BWT forall a. Seq a
DS.Empty
bytestringBWTFromFMIndexT (FMIndexT (CcT
_,OccCKT Seq (Maybe Text, Seq (Int, Int, Maybe Text))
DS.Empty,SAT
_)) = forall a. Seq (Maybe a) -> BWT a
BWT forall a. Seq a
DS.Empty
bytestringBWTFromFMIndexT (FMIndexT (CcT
_,OccCKT
_,SAT SuffixArray Text
DS.Empty))    = forall a. Seq (Maybe a) -> BWT a
BWT forall a. Seq a
DS.Empty
bytestringBWTFromFMIndexT FMIndexT
xs                               = do
  let originalbwtb :: PTOccCKSeqT
originalbwtb = FMIndexT -> PTOccCKSeqT
seqFromFMIndexT FMIndexT
xs
  forall a. Seq (Maybe a) -> BWT a
BWT (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Maybe Text
x -> if | forall a. Maybe a -> Bool
isNothing Maybe Text
x
                      -> forall a. Maybe a
Nothing
                      | Bool
otherwise
                      -> forall a. a -> Maybe a
Just           forall a b. (a -> b) -> a -> b
$
                         Text -> ByteString
DTE.encodeUtf8 forall a b. (a -> b) -> a -> b
$
                         forall a. HasCallStack => Maybe a -> a
fromJust Maybe Text
x
            ) PTOccCKSeqT
originalbwtb)

-- | Takes a 'FMIndexB' and returns
-- the 'BWT' of 'Text's.
textBWTFromFMIndexB :: FMIndexB
                    -> BWT Text
textBWTFromFMIndexB :: FMIndexB -> BWT Text
textBWTFromFMIndexB (FMIndexB (CcB Seq (Int, Maybe ByteString)
DS.Empty,OccCKB
_,SAB
_))    = forall a. Seq (Maybe a) -> BWT a
BWT forall a. Seq a
DS.Empty
textBWTFromFMIndexB (FMIndexB (CcB
_,OccCKB Seq (Maybe ByteString, Seq (Int, Int, Maybe ByteString))
DS.Empty,SAB
_)) = forall a. Seq (Maybe a) -> BWT a
BWT forall a. Seq a
DS.Empty
textBWTFromFMIndexB (FMIndexB (CcB
_,OccCKB
_,SAB SuffixArray ByteString
DS.Empty))    = forall a. Seq (Maybe a) -> BWT a
BWT forall a. Seq a
DS.Empty
textBWTFromFMIndexB FMIndexB
xs                               = do
  let originalbwtt :: PBOccCKSeqB
originalbwtt = FMIndexB -> PBOccCKSeqB
seqFromFMIndexB FMIndexB
xs
  forall a. Seq (Maybe a) -> BWT a
BWT (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Maybe ByteString
x -> if | forall a. Maybe a -> Bool
isNothing Maybe ByteString
x
                      -> forall a. Maybe a
Nothing
                      | Bool
otherwise
                      -> forall a. a -> Maybe a
Just           forall a b. (a -> b) -> a -> b
$
                         ByteString -> Text
DTE.decodeUtf8 forall a b. (a -> b) -> a -> b
$
                         forall a. HasCallStack => Maybe a -> a
fromJust Maybe ByteString
x
            ) PBOccCKSeqB
originalbwtt)

-- | Take a 'FMIndexB' and returns
-- the 'BWT' of 'ByteString's.
bytestringBWTFromFMIndexB :: FMIndexB
                          -> BWT ByteString
bytestringBWTFromFMIndexB :: FMIndexB -> BWT ByteString
bytestringBWTFromFMIndexB (FMIndexB (CcB Seq (Int, Maybe ByteString)
DS.Empty,OccCKB
_,SAB
_))    = forall a. Seq (Maybe a) -> BWT a
BWT forall a. Seq a
DS.Empty
bytestringBWTFromFMIndexB (FMIndexB (CcB
_,OccCKB Seq (Maybe ByteString, Seq (Int, Int, Maybe ByteString))
DS.Empty,SAB
_)) = forall a. Seq (Maybe a) -> BWT a
BWT forall a. Seq a
DS.Empty
bytestringBWTFromFMIndexB (FMIndexB (CcB
_,OccCKB
_,SAB SuffixArray ByteString
DS.Empty))    = forall a. Seq (Maybe a) -> BWT a
BWT forall a. Seq a
DS.Empty
bytestringBWTFromFMIndexB FMIndexB
xs                               =
  forall a. Seq (Maybe a) -> BWT a
BWT (FMIndexB -> PBOccCKSeqB
seqFromFMIndexB FMIndexB
xs)

-- | Takes a 'FMIndexB' and returns
-- the original 'Seq' of 'Text's.
textFromFMIndexB :: FMIndexB
                 -> Seq (Maybe Text)
textFromFMIndexB :: FMIndexB -> PTOccCKSeqT
textFromFMIndexB (FMIndexB (CcB Seq (Int, Maybe ByteString)
DS.Empty,OccCKB
_,SAB
_))    = forall a. Seq a
DS.Empty
textFromFMIndexB (FMIndexB (CcB
_,OccCKB Seq (Maybe ByteString, Seq (Int, Int, Maybe ByteString))
DS.Empty,SAB
_)) = forall a. Seq a
DS.Empty
textFromFMIndexB (FMIndexB (CcB
_,OccCKB
_,SAB SuffixArray ByteString
DS.Empty))    = forall a. Seq a
DS.Empty
textFromFMIndexB FMIndexB
xs                               = do
  let originalt :: PBOccCKSeqB
originalt = FMIndexB -> PBOccCKSeqB
seqFromFMIndexB FMIndexB
xs
  forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Maybe ByteString
x -> if | forall a. Maybe a -> Bool
isNothing Maybe ByteString
x
                 -> forall a. Maybe a
Nothing
                 | Bool
otherwise
                 -> forall a. a -> Maybe a
Just           forall a b. (a -> b) -> a -> b
$
                    ByteString -> Text
DTE.decodeUtf8 forall a b. (a -> b) -> a -> b
$
                    forall a. HasCallStack => Maybe a -> a
fromJust Maybe ByteString
x
       ) PBOccCKSeqB
originalt

-- | Takes a 'FMIndexB' and returns
-- the original 'Seq' of 'ByteString's.
bytestringFromFMIndexB :: FMIndexB
                       -> Seq (Maybe ByteString)
bytestringFromFMIndexB :: FMIndexB -> PBOccCKSeqB
bytestringFromFMIndexB (FMIndexB (CcB Seq (Int, Maybe ByteString)
DS.Empty,OccCKB
_,SAB
_))    = forall a. Seq a
DS.Empty
bytestringFromFMIndexB (FMIndexB (CcB
_,OccCKB Seq (Maybe ByteString, Seq (Int, Int, Maybe ByteString))
DS.Empty,SAB
_)) = forall a. Seq a
DS.Empty
bytestringFromFMIndexB (FMIndexB (CcB
_,OccCKB
_,SAB SuffixArray ByteString
DS.Empty))    = forall a. Seq a
DS.Empty
bytestringFromFMIndexB FMIndexB
xs                               =
  FMIndexB -> PBOccCKSeqB
seqFromFMIndexB FMIndexB
xs

-- | Takes a 'FMIndexT' and returns
-- the original 'Seq' of 'Text's.
textFromFMIndexT :: FMIndexT
                 -> Seq (Maybe Text)
textFromFMIndexT :: FMIndexT -> PTOccCKSeqT
textFromFMIndexT (FMIndexT (CcT Seq (Int, Maybe Text)
DS.Empty,OccCKT
_,SAT
_))    = forall a. Seq a
DS.Empty
textFromFMIndexT (FMIndexT (CcT
_,OccCKT Seq (Maybe Text, Seq (Int, Int, Maybe Text))
DS.Empty,SAT
_)) = forall a. Seq a
DS.Empty
textFromFMIndexT (FMIndexT (CcT
_,OccCKT
_,SAT SuffixArray Text
DS.Empty))    = forall a. Seq a
DS.Empty
textFromFMIndexT FMIndexT
xs                               =
  FMIndexT -> PTOccCKSeqT
seqFromFMIndexT FMIndexT
xs

-- | Takes a 'FMIndexT' and returns
-- the original 'Seq' of 'ByteString's.
bytestringFromFMIndexT :: FMIndexT
                       -> Seq (Maybe ByteString)
bytestringFromFMIndexT :: FMIndexT -> PBOccCKSeqB
bytestringFromFMIndexT (FMIndexT (CcT Seq (Int, Maybe Text)
DS.Empty,OccCKT
_,SAT
_))    = forall a. Seq a
DS.Empty
bytestringFromFMIndexT (FMIndexT (CcT
_,OccCKT Seq (Maybe Text, Seq (Int, Int, Maybe Text))
DS.Empty,SAT
_)) = forall a. Seq a
DS.Empty
bytestringFromFMIndexT (FMIndexT (CcT
_,OccCKT
_,SAT SuffixArray Text
DS.Empty))    = forall a. Seq a
DS.Empty
bytestringFromFMIndexT FMIndexT
xs                               = do
  let originalb :: PTOccCKSeqT
originalb = FMIndexT -> PTOccCKSeqT
seqFromFMIndexT FMIndexT
xs
  forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Maybe Text
x -> if | forall a. Maybe a -> Bool
isNothing Maybe Text
x
                 -> forall a. Maybe a
Nothing
                 | Bool
otherwise
                 -> forall a. a -> Maybe a
Just           forall a b. (a -> b) -> a -> b
$
                    Text -> ByteString
DTE.encodeUtf8 forall a b. (a -> b) -> a -> b
$
                    forall a. HasCallStack => Maybe a -> a
fromJust Maybe Text
x
       ) PTOccCKSeqT
originalb

{-------------------------}


{-Count operations.-}

-- | Takes a list of pattern(s) of 'ByteString's
-- and an input 'ByteString'
-- and returns the number of occurences of the pattern(s)
-- in the input 'ByteString'.
bytestringFMIndexCountS :: [ByteString]
                        -> ByteString
                        -> Seq (ByteString,CIntB)
bytestringFMIndexCountS :: [ByteString] -> ByteString -> Seq (ByteString, CIntB)
bytestringFMIndexCountS []      ByteString
_                        = forall a. Seq a
DS.Empty
bytestringFMIndexCountS [ByteString]
_       (ByteString -> Maybe (Char, ByteString)
BSC8.uncons -> Maybe (Char, ByteString)
Nothing) = forall a. Seq a
DS.Empty
bytestringFMIndexCountS [ByteString]
allpats ByteString
input                    = do
  let bfmindex :: FMIndexB
bfmindex = ByteString -> FMIndexB
bytestringToBWTToFMIndexB ByteString
input
  [ByteString] -> FMIndexB -> Seq (ByteString, CIntB)
iBFMC [ByteString]
allpats
        FMIndexB
bfmindex
    where
      iBFMC :: [ByteString] -> FMIndexB -> Seq (ByteString, CIntB)
iBFMC []                      FMIndexB
_    = forall a. Seq a
DS.Empty
      iBFMC (ByteString
currentpat:[ByteString]
restofpats) FMIndexB
bfmi = do 
        let patternf :: Seq ByteString
patternf          = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Char -> ByteString
BSC8.singleton) forall a b. (a -> b) -> a -> b
$
                                forall a. [a] -> Seq a
DS.fromList           forall a b. (a -> b) -> a -> b
$
                                ByteString -> String
BSC8.unpack ByteString
currentpat
        let countf :: CIntB
countf            = forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ forall s. Seq ByteString -> FMIndexB -> ST s CIntB
countFMIndexB Seq ByteString
patternf
                                                      FMIndexB
bfmi
        (ByteString
currentpat,CIntB
countf) forall a. a -> Seq a -> Seq a
DS.<| ([ByteString] -> FMIndexB -> Seq (ByteString, CIntB)
iBFMC [ByteString]
restofpats FMIndexB
bfmi)

-- | Takes a list of pattern(s) of 'Text's
-- and an input 'Text'
-- and returns the number of occurences of the pattern(s)
-- in the input 'Text'.
textFMIndexCountS :: [Text]
                  -> Text
                  -> Seq (Text,CIntT)
textFMIndexCountS :: [Text] -> Text -> Seq (Text, CIntB)
textFMIndexCountS []      Text
_     = forall a. Seq a
DS.Empty 
textFMIndexCountS [Text]
_       Text
""    = forall a. Seq a
DS.Empty
textFMIndexCountS [Text]
allpats Text
input = do
  let tfmindex :: FMIndexT
tfmindex = Text -> FMIndexT
textToBWTToFMIndexT Text
input
  [Text] -> FMIndexT -> Seq (Text, CIntB)
iTFMC [Text]
allpats
        FMIndexT
tfmindex
    where
      iTFMC :: [Text] -> FMIndexT -> Seq (Text, CIntB)
iTFMC []                      FMIndexT
_    = forall a. Seq a
DS.Empty
      iTFMC (Text
currentpat:[Text]
restofpats) FMIndexT
tfmi = do
        let patternf :: Seq Text
patternf    = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Char -> Text
DText.singleton) forall a b. (a -> b) -> a -> b
$
                          forall a. [a] -> Seq a
DS.fromList            forall a b. (a -> b) -> a -> b
$
                          Text -> String
DText.unpack Text
currentpat
        let countf :: CIntB
countf      = forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ forall s. Seq Text -> FMIndexT -> ST s CIntB
countFMIndexT Seq Text
patternf
                                                FMIndexT
tfmi
        (Text
currentpat,CIntB
countf) forall a. a -> Seq a -> Seq a
DS.<| ([Text] -> FMIndexT -> Seq (Text, CIntB)
iTFMC [Text]
restofpats FMIndexT
tfmi)

-- | Takes a list of pattern(s) of 'ByteString's
-- and an input 'ByteString'
-- and returns the number of occurences of the pattern(s)
-- in the input 'ByteString'.
-- Parallelized and utilizes chunking
-- based on the number of available cores.
-- When using, compile with: -O2 -threaded -with-rtsopts=-N.
bytestringFMIndexCountP :: [ByteString]
                        -> ByteString
                        -> IO (Seq (ByteString,CIntB))
bytestringFMIndexCountP :: [ByteString] -> ByteString -> IO (Seq (ByteString, CIntB))
bytestringFMIndexCountP []      ByteString
_                        = forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Seq a
DS.Empty
bytestringFMIndexCountP [ByteString]
_       (ByteString -> Maybe (Char, ByteString)
BSC8.uncons -> Maybe (Char, ByteString)
Nothing) = forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Seq a
DS.Empty
bytestringFMIndexCountP [ByteString]
allpats ByteString
input                    = do
  Int
numcores <- IO Int
CC.getNumCapabilities
  let chunksize :: Int
chunksize = (forall (t :: * -> *) a. Foldable t => t a -> Int
P.length [ByteString]
allpats) forall a. Integral a => a -> a -> a
`div` Int
numcores
  let bfmindex :: FMIndexB
bfmindex  = ByteString -> FMIndexB
bytestringToBWTToFMIndexB ByteString
input
  let bcount :: [(ByteString, CIntB)]
bcount    = ([ByteString] -> FMIndexB -> [(ByteString, CIntB)]
iBFMC [ByteString]
allpats FMIndexB
bfmindex)
                  forall a. a -> Strategy a -> a
`CPS.using`
                  (forall a. Int -> Strategy a -> Strategy [a]
CPS.parListChunk Int
chunksize forall a. Strategy a
CPS.rseq)
  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a. [a] -> Seq a
DS.fromList [(ByteString, CIntB)]
bcount
    where
      iBFMC :: [ByteString] -> FMIndexB -> [(ByteString, CIntB)]
iBFMC []                      FMIndexB
_    = []
      iBFMC (ByteString
currentpat:[ByteString]
restofpats) FMIndexB
bfmi = do 
        let patternf :: Seq ByteString
patternf          = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Char -> ByteString
BSC8.singleton) forall a b. (a -> b) -> a -> b
$
                                forall a. [a] -> Seq a
DS.fromList           forall a b. (a -> b) -> a -> b
$
                                ByteString -> String
BSC8.unpack ByteString
currentpat
        let countf :: CIntB
countf            = forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ forall s. Seq ByteString -> FMIndexB -> ST s CIntB
countFMIndexB Seq ByteString
patternf
                                                      FMIndexB
bfmi
        (ByteString
currentpat,CIntB
countf) forall a. a -> [a] -> [a]
: ([ByteString] -> FMIndexB -> [(ByteString, CIntB)]
iBFMC [ByteString]
restofpats FMIndexB
bfmi)

-- | Takes a list of pattern(s) of 'Text's
-- and an input 'Text'
-- and returns the number of occurences of the pattern(s)
-- in the input 'Text'.
-- Parallelized and utilizes chunking
-- based on the number of available cores.
-- When using, compile with: -O2 -threaded -with-rtsopts=-N.
textFMIndexCountP :: [Text]
                  -> Text
                  -> IO (Seq (Text,CIntT))
textFMIndexCountP :: [Text] -> Text -> IO (Seq (Text, CIntB))
textFMIndexCountP []      Text
_     = forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Seq a
DS.Empty 
textFMIndexCountP [Text]
_       Text
""    = forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Seq a
DS.Empty
textFMIndexCountP [Text]
allpats Text
input = do
  Int
numcores <- IO Int
CC.getNumCapabilities 
  let chunksize :: Int
chunksize = (forall (t :: * -> *) a. Foldable t => t a -> Int
P.length [Text]
allpats) forall a. Integral a => a -> a -> a
`div` Int
numcores
  let tfmindex :: FMIndexT
tfmindex  = Text -> FMIndexT
textToBWTToFMIndexT Text
input
  let tcount :: [(Text, CIntB)]
tcount    = ([Text] -> FMIndexT -> [(Text, CIntB)]
iTFMC [Text]
allpats FMIndexT
tfmindex)
                  forall a. a -> Strategy a -> a
`CPS.using`
                  (forall a. Int -> Strategy a -> Strategy [a]
CPS.parListChunk Int
chunksize forall a. Strategy a
CPS.rseq)
  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a. [a] -> Seq a
DS.fromList [(Text, CIntB)]
tcount
    where
      iTFMC :: [Text] -> FMIndexT -> [(Text, CIntB)]
iTFMC []                      FMIndexT
_    = []
      iTFMC (Text
currentpat:[Text]
restofpats) FMIndexT
tfmi = do
        let patternf :: Seq Text
patternf    = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Char -> Text
DText.singleton) forall a b. (a -> b) -> a -> b
$
                          forall a. [a] -> Seq a
DS.fromList            forall a b. (a -> b) -> a -> b
$
                          Text -> String
DText.unpack Text
currentpat
        let countf :: CIntB
countf      = forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ forall s. Seq Text -> FMIndexT -> ST s CIntB
countFMIndexT Seq Text
patternf
                                                FMIndexT
tfmi
        (Text
currentpat,CIntB
countf) forall a. a -> [a] -> [a]
: ([Text] -> FMIndexT -> [(Text, CIntB)]
iTFMC [Text]
restofpats FMIndexT
tfmi)

{-------------------}


{-Locate operations.-}

-- | Takes a list of pattern(s) of 'ByteString's
-- and an input 'ByteString'
-- and returns the indexe(s) of occurences of the pattern(s)
-- in the input 'ByteString'.
-- The output indices are __1__-based,
-- and are __not__ sorted.
bytestringFMIndexLocateS :: [ByteString]
                         -> ByteString
                         -> Seq (ByteString,LIntB)
bytestringFMIndexLocateS :: [ByteString] -> ByteString -> Seq (ByteString, LIntB)
bytestringFMIndexLocateS []      ByteString
_                        = forall a. Seq a
DS.Empty
bytestringFMIndexLocateS [ByteString]
_       (ByteString -> Maybe (Char, ByteString)
BSC8.uncons -> Maybe (Char, ByteString)
Nothing) = forall a. Seq a
DS.Empty
bytestringFMIndexLocateS [ByteString]
allpats ByteString
input                    = do
  let bfmindex :: FMIndexB
bfmindex     = ByteString -> FMIndexB
bytestringToBWTToFMIndexB ByteString
input
  let bytestringsa :: SuffixArray ByteString
bytestringsa = (\(SAB SuffixArray ByteString
t) -> SuffixArray ByteString
t) forall a b. (a -> b) -> a -> b
$
                     (\(CcB
_,OccCKB
_,SAB
c) -> SAB
c) forall a b. (a -> b) -> a -> b
$
                     (\(FMIndexB (CcB, OccCKB, SAB)
t) -> (CcB, OccCKB, SAB)
t) FMIndexB
bfmindex
  forall {a}.
[ByteString]
-> Seq (Suffix a) -> FMIndexB -> Seq (ByteString, LIntB)
iBFML [ByteString]
allpats
        SuffixArray ByteString
bytestringsa
        FMIndexB
bfmindex
    where
      iBFML :: [ByteString]
-> Seq (Suffix a) -> FMIndexB -> Seq (ByteString, LIntB)
iBFML []                      Seq (Suffix a)
_   FMIndexB
_    = forall a. Seq a
DS.Empty
      iBFML (ByteString
currentpat:[ByteString]
restofpats) Seq (Suffix a)
bsa FMIndexB
bfmi = do
        let patternf :: Seq ByteString
patternf    = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Char -> ByteString
BSC8.singleton) forall a b. (a -> b) -> a -> b
$
                          forall a. [a] -> Seq a
DS.fromList           forall a b. (a -> b) -> a -> b
$
                          ByteString -> String
BSC8.unpack ByteString
currentpat
        let indices :: LIntB
indices     = forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ forall s. Seq ByteString -> FMIndexB -> ST s LIntB
locateFMIndexB Seq ByteString
patternf
                                                 FMIndexB
bfmi
        let indicesf :: LIntB
indicesf    = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\CIntB
x -> if | forall a. Maybe a -> Bool
isNothing CIntB
x
                                         -> forall a. Maybe a
Nothing
                                         | Bool
otherwise
                                         -> forall a. a -> Maybe a
Just           forall a b. (a -> b) -> a -> b
$
                                            forall a. Suffix a -> Int
suffixstartpos forall a b. (a -> b) -> a -> b
$
                                            forall a. Seq a -> Int -> a
DS.index Seq (Suffix a)
bsa ((forall a. HasCallStack => Maybe a -> a
fromJust CIntB
x) forall a. Num a => a -> a -> a
- Int
1)
                               ) LIntB
indices
        (ByteString
currentpat,LIntB
indicesf) forall a. a -> Seq a -> Seq a
DS.<| ([ByteString]
-> Seq (Suffix a) -> FMIndexB -> Seq (ByteString, LIntB)
iBFML [ByteString]
restofpats Seq (Suffix a)
bsa FMIndexB
bfmi)

-- | Takes a list of pattern(s) of 'Text's
-- and an input 'Text'
-- and returns the indexe(s) of occurences of the pattern(s)
-- in the input 'Text'.
-- The output indices are __1__-based,
-- and are __not__ sorted.
textFMIndexLocateS :: [Text]
                   -> Text
                   -> Seq (Text,LIntT)
textFMIndexLocateS :: [Text] -> Text -> Seq (Text, LIntB)
textFMIndexLocateS []      Text
_     = forall a. Seq a
DS.Empty
textFMIndexLocateS [Text]
_       Text
""    = forall a. Seq a
DS.Empty
textFMIndexLocateS [Text]
allpats Text
input = do
  let tfmindex :: FMIndexT
tfmindex    = Text -> FMIndexT
textToBWTToFMIndexT Text
input
  let textsa :: SuffixArray Text
textsa      = (\(SAT SuffixArray Text
t) -> SuffixArray Text
t) forall a b. (a -> b) -> a -> b
$
                    (\(CcT
_,OccCKT
_,SAT
c) -> SAT
c) forall a b. (a -> b) -> a -> b
$
                    (\(FMIndexT (CcT, OccCKT, SAT)
t) -> (CcT, OccCKT, SAT)
t) FMIndexT
tfmindex
  forall {a}.
[Text] -> Seq (Suffix a) -> FMIndexT -> Seq (Text, LIntB)
iTFML [Text]
allpats
        SuffixArray Text
textsa
        FMIndexT
tfmindex
    where
      iTFML :: [Text] -> Seq (Suffix a) -> FMIndexT -> Seq (Text, LIntB)
iTFML []                      Seq (Suffix a)
_   FMIndexT
_    = forall a. Seq a
DS.Empty
      iTFML (Text
currentpat:[Text]
restofpats) Seq (Suffix a)
tsa FMIndexT
tfmi = do
        let patternf :: Seq Text
patternf    = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Char -> Text
DText.singleton) forall a b. (a -> b) -> a -> b
$
                          forall a. [a] -> Seq a
DS.fromList            forall a b. (a -> b) -> a -> b
$
                          Text -> String
DText.unpack Text
currentpat
        let indices :: LIntB
indices     = forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ forall s. Seq Text -> FMIndexT -> ST s LIntB
locateFMIndexT Seq Text
patternf
                                                 FMIndexT
tfmi
        let indicesf :: LIntB
indicesf    = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\CIntB
x -> if | forall a. Maybe a -> Bool
isNothing CIntB
x
                                         -> forall a. Maybe a
Nothing
                                         | Bool
otherwise
                                         -> forall a. a -> Maybe a
Just           forall a b. (a -> b) -> a -> b
$
                                            forall a. Suffix a -> Int
suffixstartpos forall a b. (a -> b) -> a -> b
$
                                            forall a. Seq a -> Int -> a
DS.index Seq (Suffix a)
tsa ((forall a. HasCallStack => Maybe a -> a
fromJust CIntB
x) forall a. Num a => a -> a -> a
- Int
1)
                               ) LIntB
indices
        (Text
currentpat,LIntB
indicesf) forall a. a -> Seq a -> Seq a
DS.<| ([Text] -> Seq (Suffix a) -> FMIndexT -> Seq (Text, LIntB)
iTFML [Text]
restofpats Seq (Suffix a)
tsa FMIndexT
tfmi)

-- | Takes a list of pattern(s) of 'ByteString's
-- and an input 'ByteString'
-- and returns the indexe(s) of occurences of the pattern(s)
-- in the input 'ByteString'.
-- The output indices are __1__-based,
-- and are __not__ sorted.
-- Parallelized and utilizes chunking
-- based on the number of available cores.
-- When using, compile with: -O2 -threaded -with-rtsopts=-N.
bytestringFMIndexLocateP :: [ByteString]
                         -> ByteString
                         -> IO (Seq (ByteString,LIntB))
bytestringFMIndexLocateP :: [ByteString] -> ByteString -> IO (Seq (ByteString, LIntB))
bytestringFMIndexLocateP []      ByteString
_                        = forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Seq a
DS.Empty
bytestringFMIndexLocateP [ByteString]
_       (ByteString -> Maybe (Char, ByteString)
BSC8.uncons -> Maybe (Char, ByteString)
Nothing) = forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Seq a
DS.Empty
bytestringFMIndexLocateP [ByteString]
allpats ByteString
input                    = do
  Int
numcores <- IO Int
CC.getNumCapabilities
  let chunksize :: Int
chunksize    = (forall (t :: * -> *) a. Foldable t => t a -> Int
P.length [ByteString]
allpats) forall a. Integral a => a -> a -> a
`div` Int
numcores
  let bfmindex :: FMIndexB
bfmindex     = ByteString -> FMIndexB
bytestringToBWTToFMIndexB ByteString
input
  let bytestringsa :: SuffixArray ByteString
bytestringsa = (\(SAB SuffixArray ByteString
t) -> SuffixArray ByteString
t) forall a b. (a -> b) -> a -> b
$
                     (\(CcB
_,OccCKB
_,SAB
c) -> SAB
c) forall a b. (a -> b) -> a -> b
$
                     (\(FMIndexB (CcB, OccCKB, SAB)
t) -> (CcB, OccCKB, SAB)
t) FMIndexB
bfmindex
  let blocate :: [(ByteString, LIntB)]
blocate      = (forall {a}.
[ByteString] -> Seq (Suffix a) -> FMIndexB -> [(ByteString, LIntB)]
iBFML [ByteString]
allpats SuffixArray ByteString
bytestringsa FMIndexB
bfmindex)
                     forall a. a -> Strategy a -> a
`CPS.using`
                     (forall a. Int -> Strategy a -> Strategy [a]
CPS.parListChunk Int
chunksize forall a. Strategy a
CPS.rseq)
  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a. [a] -> Seq a
DS.fromList [(ByteString, LIntB)]
blocate
    where
      iBFML :: [ByteString] -> Seq (Suffix a) -> FMIndexB -> [(ByteString, LIntB)]
iBFML []                      Seq (Suffix a)
_   FMIndexB
_    = []
      iBFML (ByteString
currentpat:[ByteString]
restofpats) Seq (Suffix a)
bsa FMIndexB
bfmi = do
        let patternf :: Seq ByteString
patternf    = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Char -> ByteString
BSC8.singleton) forall a b. (a -> b) -> a -> b
$
                          forall a. [a] -> Seq a
DS.fromList           forall a b. (a -> b) -> a -> b
$
                          ByteString -> String
BSC8.unpack ByteString
currentpat
        let indices :: LIntB
indices     = forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ forall s. Seq ByteString -> FMIndexB -> ST s LIntB
locateFMIndexB Seq ByteString
patternf
                                                 FMIndexB
bfmi
        let indicesf :: LIntB
indicesf    = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\CIntB
x -> if | forall a. Maybe a -> Bool
isNothing CIntB
x
                                         -> forall a. Maybe a
Nothing
                                         | Bool
otherwise
                                         -> forall a. a -> Maybe a
Just           forall a b. (a -> b) -> a -> b
$
                                            forall a. Suffix a -> Int
suffixstartpos forall a b. (a -> b) -> a -> b
$
                                            forall a. Seq a -> Int -> a
DS.index Seq (Suffix a)
bsa ((forall a. HasCallStack => Maybe a -> a
fromJust CIntB
x) forall a. Num a => a -> a -> a
- Int
1)
                               ) LIntB
indices
        (ByteString
currentpat,LIntB
indicesf) forall a. a -> [a] -> [a]
: ([ByteString] -> Seq (Suffix a) -> FMIndexB -> [(ByteString, LIntB)]
iBFML [ByteString]
restofpats Seq (Suffix a)
bsa FMIndexB
bfmi)

-- | Takes a list of pattern(s) of 'Text's
-- and an input 'Text'
-- and returns the indexe(s) of occurences of the pattern(s)
-- in the input 'Text'.
-- The output indices are __1__-based,
-- and are __not__ sorted.
-- Parallelized and utilizes chunking
-- based on the number of available cores.
-- When using, compile with: -O2 -threaded -with-rtsopts=-N.
textFMIndexLocateP :: [Text]
                   -> Text
                   -> IO (Seq (Text,LIntT))
textFMIndexLocateP :: [Text] -> Text -> IO (Seq (Text, LIntB))
textFMIndexLocateP []      Text
_     = forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Seq a
DS.Empty
textFMIndexLocateP [Text]
_       Text
""    = forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Seq a
DS.Empty
textFMIndexLocateP [Text]
allpats Text
input = do
  Int
numcores <- IO Int
CC.getNumCapabilities
  let chunksize :: Int
chunksize = (forall (t :: * -> *) a. Foldable t => t a -> Int
P.length [Text]
allpats) forall a. Integral a => a -> a -> a
`div` Int
numcores
  let tfmindex :: FMIndexT
tfmindex  = Text -> FMIndexT
textToBWTToFMIndexT Text
input
  let textsa :: SuffixArray Text
textsa    = (\(SAT SuffixArray Text
t) -> SuffixArray Text
t) forall a b. (a -> b) -> a -> b
$
                  (\(CcT
_,OccCKT
_,SAT
c) -> SAT
c) forall a b. (a -> b) -> a -> b
$
                  (\(FMIndexT (CcT, OccCKT, SAT)
t) -> (CcT, OccCKT, SAT)
t) FMIndexT
tfmindex
  let tlocate :: [(Text, LIntB)]
tlocate   = (forall {a}. [Text] -> Seq (Suffix a) -> FMIndexT -> [(Text, LIntB)]
iTFML [Text]
allpats SuffixArray Text
textsa FMIndexT
tfmindex)
                  forall a. a -> Strategy a -> a
`CPS.using`
                  (forall a. Int -> Strategy a -> Strategy [a]
CPS.parListChunk Int
chunksize forall a. Strategy a
CPS.rseq)
  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a. [a] -> Seq a
DS.fromList [(Text, LIntB)]
tlocate
    where
      iTFML :: [Text] -> Seq (Suffix a) -> FMIndexT -> [(Text, LIntB)]
iTFML []                      Seq (Suffix a)
_   FMIndexT
_    = []
      iTFML (Text
currentpat:[Text]
restofpats) Seq (Suffix a)
tsa FMIndexT
tfmi = do
        let patternf :: Seq Text
patternf    = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Char -> Text
DText.singleton) forall a b. (a -> b) -> a -> b
$
                          forall a. [a] -> Seq a
DS.fromList            forall a b. (a -> b) -> a -> b
$
                          Text -> String
DText.unpack Text
currentpat
        let indices :: LIntB
indices     = forall a. (forall s. ST s a) -> a
runST forall a b. (a -> b) -> a -> b
$ forall s. Seq Text -> FMIndexT -> ST s LIntB
locateFMIndexT Seq Text
patternf
                                                 FMIndexT
tfmi
        let indicesf :: LIntB
indicesf    = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\CIntB
x -> if | forall a. Maybe a -> Bool
isNothing CIntB
x
                                         -> forall a. Maybe a
Nothing
                                         | Bool
otherwise
                                         -> forall a. a -> Maybe a
Just           forall a b. (a -> b) -> a -> b
$
                                            forall a. Suffix a -> Int
suffixstartpos forall a b. (a -> b) -> a -> b
$
                                            forall a. Seq a -> Int -> a
DS.index Seq (Suffix a)
tsa ((forall a. HasCallStack => Maybe a -> a
fromJust CIntB
x) forall a. Num a => a -> a -> a
- Int
1)
                               ) LIntB
indices
        (Text
currentpat,LIntB
indicesf) forall a. a -> [a] -> [a]
: ([Text] -> Seq (Suffix a) -> FMIndexT -> [(Text, LIntB)]
iTFML [Text]
restofpats Seq (Suffix a)
tsa FMIndexT
tfmi)

{--------------------}