-- | Streaming Fasta handling via the @streaming@ library.
--
-- The functions in here should be streaming in constant memory.
--
-- A typical, slightly complicated is this:
-- @
--  forEach :: forall r . Stream (ByteString m) m r -> m (Stream (Of ()) m r)
--  forEach dna = do
--    -- extract the header, but at most 123 characters, dropping the rest
--    hdr SP.:> dta ← extractHeader (Just 123) dna
--    -- create windows @ws@ of a particular type. Include the prefix, the suffix, and make each window 10 characters long
--    let ws = (streamedWindows True True (Just 10) (SequenceIdentifier hdr) PlusStrand dta :: SP.Stream (SP.Of (BioSequenceWindow "DNA" DNA 0)) m r)
--    -- count the number of characters in @dna@, get the return value, print each window
--    count SP.:> r ← SP.mapM_ (liftIO . print) . bswSeqLength $ SP.copy ws
--    liftIO $ print count
--    liftIO $ putStrLn ""
--    -- yield one vacuous @()@ result, return the remainder @r@ from dna.
--    return $ SP.yield () *> return r
-- @
--
-- TODO Check if this is actually true with some unit tests.

module Biobase.Fasta.Streaming
  ( module Biobase.Fasta.Streaming
  ) where

import Control.Lens hiding (Index,Empty, mapped)
import Control.Monad
import Control.Monad.Trans.Resource (runResourceT, ResourceT(..), MonadResource)
import Data.Semigroup as SG
import Debug.Trace
import GHC.Generics (Generic)
import GHC.TypeLits
import Prelude as P
import qualified Data.ByteString.Char8 as BS
import qualified Streaming.Internal as SI
import Streaming as S
import Streaming.ByteString as BSS
import Streaming.ByteString.Char8 as S8
import Streaming.ByteString.Internal as SBI
import Streaming.Prelude as SP

import Data.ByteString.Streaming.Split

import Biobase.Types.BioSequence
import Biobase.Types.Index.Type
import Biobase.Types.Location
import Biobase.Types.Position
import Biobase.Types.Strand



-- |

streamedFasta :: (Monad m) => ByteStream m r -> Stream (Stream (ByteStream m) m) m r
{-# Inlinable streamedFasta #-}
streamedFasta :: ByteStream m r -> Stream (Stream (ByteStream m) m) m r
streamedFasta = (forall x. Stream (ByteStream m) m x -> Stream (ByteStream m) m x)
-> Stream (Stream (ByteStream m) m) m r
-> Stream (Stream (ByteStream m) m) m r
forall (m :: * -> *) (f :: * -> *) (g :: * -> *) r.
(Monad m, Functor f) =>
(forall x. f x -> g x) -> Stream f m r -> Stream g m r
S.maps forall x. Stream (ByteStream m) m x -> Stream (ByteStream m) m x
forall (m :: * -> *) r.
Monad m =>
Stream (ByteStream m) m r -> Stream (ByteStream m) m r
collapseData (Stream (Stream (ByteStream m) m) m r
 -> Stream (Stream (ByteStream m) m) m r)
-> (ByteStream m r -> Stream (Stream (ByteStream m) m) m r)
-> ByteStream m r
-> Stream (Stream (ByteStream m) m) m r
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteStream m r -> Stream (Stream (ByteStream m) m) m r
forall (m :: * -> *) r.
Monad m =>
ByteStream m r -> Stream (Stream (ByteStream m) m) m r
streamOfStreamedFasta

-- | Here each individual fasta file will be a stream.
--
-- TODO Once this works, @streamingFasta@ should be @S.concats . streamOfStreamedFasta@ ...

streamOfStreamedFasta
  :: forall m r
  . ( Monad m )
  => ByteStream m r
  -> Stream (Stream (ByteStream m) m) m r
  -- ^ 
{-# Inlinable streamOfStreamedFasta #-}
streamOfStreamedFasta :: ByteStream m r -> Stream (Stream (ByteStream m) m) m r
streamOfStreamedFasta = Stream (ByteStream m) m r -> Stream (Stream (ByteStream m) m) m r
forall (m :: * -> *) r.
Monad m =>
Stream (ByteStream m) m r -> Stream (Stream (ByteStream m) m) m r
go (Stream (ByteStream m) m r -> Stream (Stream (ByteStream m) m) m r)
-> (ByteStream m r -> Stream (ByteStream m) m r)
-> ByteStream m r
-> Stream (Stream (ByteStream m) m) m r
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteStream m r -> Stream (ByteStream m) m r
forall (m :: * -> *) r.
Monad m =>
ByteStream m r -> Stream (ByteStream m) m r
S8.lines where
  go :: Stream (ByteStream m) m r -> Stream (Stream (ByteStream m) m) m r
go = \case
    SI.Return r
r -> r -> Stream (Stream (ByteStream m) m) m r
forall (f :: * -> *) (m :: * -> *) r. r -> Stream f m r
SI.Return r
r
    SI.Effect m (Stream (ByteStream m) m r)
m -> m (Stream (Stream (ByteStream m) m) m r)
-> Stream (Stream (ByteStream m) m) m r
forall (f :: * -> *) (m :: * -> *) r.
m (Stream f m r) -> Stream f m r
SI.Effect ((Stream (ByteStream m) m r -> Stream (Stream (ByteStream m) m) m r)
-> m (Stream (ByteStream m) m r)
-> m (Stream (Stream (ByteStream m) m) m r)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Stream (ByteStream m) m r -> Stream (Stream (ByteStream m) m) m r
go m (Stream (ByteStream m) m r)
m)
    SI.Step ByteStream m (Stream (ByteStream m) m r)
fs -> Stream (ByteStream m) m (Stream (Stream (ByteStream m) m) m r)
-> Stream (Stream (ByteStream m) m) m r
forall (f :: * -> *) (m :: * -> *) r.
f (Stream f m r) -> Stream f m r
SI.Step (ByteStream
  m (Stream (ByteStream m) m (Stream (Stream (ByteStream m) m) m r))
-> Stream (ByteStream m) m (Stream (Stream (ByteStream m) m) m r)
forall (f :: * -> *) (m :: * -> *) r.
f (Stream f m r) -> Stream f m r
SI.Step ((Stream (ByteStream m) m r
 -> Stream (ByteStream m) m (Stream (Stream (ByteStream m) m) m r))
-> ByteStream m (Stream (ByteStream m) m r)
-> ByteStream
     m (Stream (ByteStream m) m (Stream (Stream (ByteStream m) m) m r))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Stream (ByteStream m) m r -> Stream (Stream (ByteStream m) m) m r)
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
-> Stream (ByteStream m) m (Stream (Stream (ByteStream m) m) m r)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Stream (ByteStream m) m r -> Stream (Stream (ByteStream m) m) m r
go (Stream (ByteStream m) m (Stream (ByteStream m) m r)
 -> Stream (ByteStream m) m (Stream (Stream (ByteStream m) m) m r))
-> (Stream (ByteStream m) m r
    -> Stream (ByteStream m) m (Stream (ByteStream m) m r))
-> Stream (ByteStream m) m r
-> Stream (ByteStream m) m (Stream (Stream (ByteStream m) m) m r)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Stream (ByteStream m) m r
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
forall (m :: * -> *) r.
Monad m =>
Stream (ByteStream m) m r
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
splitFasta) ByteStream m (Stream (ByteStream m) m r)
fs))

-- | Given a 'Stream (ByteString m) m r' which is a 'Stream' of @lines@, split
-- off the first @Fasta@ entry.

splitFasta :: (Monad m) => Stream (ByteStream m) m r -> Stream (ByteStream m) m (Stream (ByteStream m) m r)
{-# Inlinable splitFasta #-}
splitFasta :: Stream (ByteStream m) m r
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
splitFasta = Bool
-> Stream (ByteStream m) m r
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
forall (m :: * -> *) r.
Monad m =>
Bool
-> Stream (ByteStream m) m r
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
loop Bool
False where
  loop :: Bool
-> Stream (ByteStream m) m r
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
loop Bool
hdr = \case
    SI.Return r
r -> Stream (ByteStream m) m r
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
forall (f :: * -> *) (m :: * -> *) r. r -> Stream f m r
SI.Return (r -> Stream (ByteStream m) m r
forall (f :: * -> *) (m :: * -> *) r. r -> Stream f m r
SI.Return r
r)
    SI.Effect m (Stream (ByteStream m) m r)
m -> m (Stream (ByteStream m) m (Stream (ByteStream m) m r))
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
forall (f :: * -> *) (m :: * -> *) r.
m (Stream f m r) -> Stream f m r
SI.Effect ((Stream (ByteStream m) m r
 -> Stream (ByteStream m) m (Stream (ByteStream m) m r))
-> m (Stream (ByteStream m) m r)
-> m (Stream (ByteStream m) m (Stream (ByteStream m) m r))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Bool
-> Stream (ByteStream m) m r
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
loop Bool
hdr) m (Stream (ByteStream m) m r)
m)
    SI.Step ByteStream m (Stream (ByteStream m) m r)
bs  -> case ByteStream m (Stream (ByteStream m) m r)
bs of
      Empty Stream (ByteStream m) m r
r -> Bool
-> Stream (ByteStream m) m r
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
loop Bool
hdr Stream (ByteStream m) m r
r
      Chunk ByteString
cs ByteStream m (Stream (ByteStream m) m r)
xs
        | ByteString -> Bool
BS.null ByteString
cs -> Bool
-> Stream (ByteStream m) m r
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
loop Bool
hdr (Stream (ByteStream m) m r
 -> Stream (ByteStream m) m (Stream (ByteStream m) m r))
-> Stream (ByteStream m) m r
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
forall a b. (a -> b) -> a -> b
$ ByteStream m (Stream (ByteStream m) m r)
-> Stream (ByteStream m) m r
forall (f :: * -> *) (m :: * -> *) r.
f (Stream f m r) -> Stream f m r
SI.Step ByteStream m (Stream (ByteStream m) m r)
xs
        | Char
hChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'>' Bool -> Bool -> Bool
|| Char
hChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
';' -> if Bool
hdr then Stream (ByteStream m) m r
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
forall (f :: * -> *) (m :: * -> *) r. r -> Stream f m r
SI.Return (ByteStream m (Stream (ByteStream m) m r)
-> Stream (ByteStream m) m r
forall (f :: * -> *) (m :: * -> *) r.
f (Stream f m r) -> Stream f m r
SI.Step ByteStream m (Stream (ByteStream m) m r)
bs) else ByteStream m (Stream (ByteStream m) m (Stream (ByteStream m) m r))
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
forall (f :: * -> *) (m :: * -> *) r.
f (Stream f m r) -> Stream f m r
SI.Step (ByteStream m (Stream (ByteStream m) m (Stream (ByteStream m) m r))
 -> Stream (ByteStream m) m (Stream (ByteStream m) m r))
-> ByteStream
     m (Stream (ByteStream m) m (Stream (ByteStream m) m r))
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
forall a b. (a -> b) -> a -> b
$ (Stream (ByteStream m) m r
 -> Stream (ByteStream m) m (Stream (ByteStream m) m r))
-> ByteStream m (Stream (ByteStream m) m r)
-> ByteStream
     m (Stream (ByteStream m) m (Stream (ByteStream m) m r))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Bool
-> Stream (ByteStream m) m r
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
loop Bool
True) ByteStream m (Stream (ByteStream m) m r)
bs
        | Bool
otherwise -> ByteStream m (Stream (ByteStream m) m (Stream (ByteStream m) m r))
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
forall (f :: * -> *) (m :: * -> *) r.
f (Stream f m r) -> Stream f m r
SI.Step (ByteStream m (Stream (ByteStream m) m (Stream (ByteStream m) m r))
 -> Stream (ByteStream m) m (Stream (ByteStream m) m r))
-> ByteStream
     m (Stream (ByteStream m) m (Stream (ByteStream m) m r))
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
forall a b. (a -> b) -> a -> b
$ (Stream (ByteStream m) m r
 -> Stream (ByteStream m) m (Stream (ByteStream m) m r))
-> ByteStream m (Stream (ByteStream m) m r)
-> ByteStream
     m (Stream (ByteStream m) m (Stream (ByteStream m) m r))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Bool
-> Stream (ByteStream m) m r
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
loop Bool
True) ByteStream m (Stream (ByteStream m) m r)
bs
        where h :: Char
h = ByteString -> Char
BS.head ByteString
cs
      Go m (ByteStream m (Stream (ByteStream m) m r))
m    -> m (Stream (ByteStream m) m (Stream (ByteStream m) m r))
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
forall (f :: * -> *) (m :: * -> *) r.
m (Stream f m r) -> Stream f m r
SI.Effect (m (Stream (ByteStream m) m (Stream (ByteStream m) m r))
 -> Stream (ByteStream m) m (Stream (ByteStream m) m r))
-> m (Stream (ByteStream m) m (Stream (ByteStream m) m r))
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
forall a b. (a -> b) -> a -> b
$ (ByteStream m (Stream (ByteStream m) m r)
 -> Stream (ByteStream m) m (Stream (ByteStream m) m r))
-> m (ByteStream m (Stream (ByteStream m) m r))
-> m (Stream (ByteStream m) m (Stream (ByteStream m) m r))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Bool
-> Stream (ByteStream m) m r
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
loop Bool
hdr) (Stream (ByteStream m) m r
 -> Stream (ByteStream m) m (Stream (ByteStream m) m r))
-> (ByteStream m (Stream (ByteStream m) m r)
    -> Stream (ByteStream m) m r)
-> ByteStream m (Stream (ByteStream m) m r)
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteStream m (Stream (ByteStream m) m r)
-> Stream (ByteStream m) m r
forall (f :: * -> *) (m :: * -> *) r.
f (Stream f m r) -> Stream f m r
SI.Step) m (ByteStream m (Stream (ByteStream m) m r))
m

-- | Given a stream, roughly like @[BS "Header", BS "Data1", BS "Data2", ...]@
-- create a stream like @[BS "Header", BS "Data"]@. This means that the
-- resulting stream holds exactly two @ByteString@'s.

collapseData :: (Monad m) => Stream (ByteStream m) m r -> Stream (ByteStream m) m r
{-# Inlinable collapseData #-}
collapseData :: Stream (ByteStream m) m r -> Stream (ByteStream m) m r
collapseData = Stream (ByteStream m) m r -> Stream (ByteStream m) m r
forall (m :: * -> *) r.
Monad m =>
Stream (ByteStream m) m r -> Stream (ByteStream m) m r
loop where
  loop :: Stream (ByteStream m) m r -> Stream (ByteStream m) m r
loop = \case
    SI.Return r
r -> r -> Stream (ByteStream m) m r
forall (f :: * -> *) (m :: * -> *) r. r -> Stream f m r
SI.Return r
r
    SI.Effect m (Stream (ByteStream m) m r)
m -> m (Stream (ByteStream m) m r) -> Stream (ByteStream m) m r
forall (f :: * -> *) (m :: * -> *) r.
m (Stream f m r) -> Stream f m r
SI.Effect ((Stream (ByteStream m) m r -> Stream (ByteStream m) m r)
-> m (Stream (ByteStream m) m r) -> m (Stream (ByteStream m) m r)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Stream (ByteStream m) m r -> Stream (ByteStream m) m r
loop m (Stream (ByteStream m) m r)
m)
    SI.Step ByteStream m (Stream (ByteStream m) m r)
bs -> case ByteStream m (Stream (ByteStream m) m r)
bs of
      Empty Stream (ByteStream m) m r
r -> Stream (ByteStream m) m r -> Stream (ByteStream m) m r
loop Stream (ByteStream m) m r
r
      Chunk ByteString
cs ByteStream m (Stream (ByteStream m) m r)
xs
        | ByteString -> Bool
BS.null ByteString
cs -> Stream (ByteStream m) m r -> Stream (ByteStream m) m r
loop (Stream (ByteStream m) m r -> Stream (ByteStream m) m r)
-> Stream (ByteStream m) m r -> Stream (ByteStream m) m r
forall a b. (a -> b) -> a -> b
$ ByteStream m (Stream (ByteStream m) m r)
-> Stream (ByteStream m) m r
forall (f :: * -> *) (m :: * -> *) r.
f (Stream f m r) -> Stream f m r
SI.Step ByteStream m (Stream (ByteStream m) m r)
xs
        | Char
hChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'>' Bool -> Bool -> Bool
|| Char
hChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
';' -> ByteStream m (Stream (ByteStream m) m r)
-> Stream (ByteStream m) m r
forall (f :: * -> *) (m :: * -> *) r.
f (Stream f m r) -> Stream f m r
SI.Step (ByteStream m (Stream (ByteStream m) m r)
 -> Stream (ByteStream m) m r)
-> ByteStream m (Stream (ByteStream m) m r)
-> Stream (ByteStream m) m r
forall a b. (a -> b) -> a -> b
$ (Stream (ByteStream m) m r -> Stream (ByteStream m) m r)
-> ByteStream m (Stream (ByteStream m) m r)
-> ByteStream m (Stream (ByteStream m) m r)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (ByteStream m r -> Stream (ByteStream m) m r
forall (m :: * -> *) (f :: * -> *) r.
(Monad m, Functor f) =>
f r -> Stream f m r
S.yields (ByteStream m r -> Stream (ByteStream m) m r)
-> (Stream (ByteStream m) m r -> ByteStream m r)
-> Stream (ByteStream m) m r
-> Stream (ByteStream m) m r
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Stream (ByteStream m) m r -> ByteStream m r
forall (m :: * -> *) r.
Monad m =>
Stream (ByteStream m) m r -> ByteStream m r
S8.concat) ByteStream m (Stream (ByteStream m) m r)
bs
        | Bool
otherwise -> ByteStream m (Stream (ByteStream m) m r)
-> Stream (ByteStream m) m r
forall (f :: * -> *) (m :: * -> *) r.
f (Stream f m r) -> Stream f m r
SI.Step (ByteStream m (Stream (ByteStream m) m r)
 -> Stream (ByteStream m) m r)
-> ByteStream m (Stream (ByteStream m) m r)
-> Stream (ByteStream m) m r
forall a b. (a -> b) -> a -> b
$ (Stream (ByteStream m) m r -> Stream (ByteStream m) m r)
-> ByteStream m (Stream (ByteStream m) m r)
-> ByteStream m (Stream (ByteStream m) m r)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Stream (ByteStream m) m r -> Stream (ByteStream m) m r
loop ByteStream m (Stream (ByteStream m) m r)
bs
        where h :: Char
h = ByteString -> Char
BS.head ByteString
cs
      Go m (ByteStream m (Stream (ByteStream m) m r))
m    -> m (Stream (ByteStream m) m r) -> Stream (ByteStream m) m r
forall (f :: * -> *) (m :: * -> *) r.
m (Stream f m r) -> Stream f m r
SI.Effect (m (Stream (ByteStream m) m r) -> Stream (ByteStream m) m r)
-> m (Stream (ByteStream m) m r) -> Stream (ByteStream m) m r
forall a b. (a -> b) -> a -> b
$ (ByteStream m (Stream (ByteStream m) m r)
 -> Stream (ByteStream m) m r)
-> m (ByteStream m (Stream (ByteStream m) m r))
-> m (Stream (ByteStream m) m r)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Stream (ByteStream m) m r -> Stream (ByteStream m) m r
loop (Stream (ByteStream m) m r -> Stream (ByteStream m) m r)
-> (ByteStream m (Stream (ByteStream m) m r)
    -> Stream (ByteStream m) m r)
-> ByteStream m (Stream (ByteStream m) m r)
-> Stream (ByteStream m) m r
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteStream m (Stream (ByteStream m) m r)
-> Stream (ByteStream m) m r
forall (f :: * -> *) (m :: * -> *) r.
f (Stream f m r) -> Stream f m r
SI.Step) m (ByteStream m (Stream (ByteStream m) m r))
m


-- | "Rechunk" a stream of bytestrings.

reChunkBS :: (Monad m) => Int -> Stream (ByteStream m) m r -> Stream (ByteStream m) m r
{-# Inlinable reChunkBS #-}
reChunkBS :: Int -> Stream (ByteStream m) m r -> Stream (ByteStream m) m r
reChunkBS Int
n = Int -> ByteString m r -> Stream (ByteStream m) m r
forall (m :: * -> *) r.
Monad m =>
Int -> ByteString m r -> Stream (ByteString m) m r
splitsByteStringAt Int
n (ByteString m r -> Stream (ByteStream m) m r)
-> (Stream (ByteStream m) m r -> ByteString m r)
-> Stream (ByteStream m) m r
-> Stream (ByteStream m) m r
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Stream (ByteStream m) m r -> ByteString m r
forall (m :: * -> *) r.
Monad m =>
Stream (ByteStream m) m r -> ByteStream m r
S8.concat

-- | Assuming a "rechunked" stream of bytestrings, create sequence windows.

chunksToWindows :: Monad m => SequenceIdentifier w -> Strand -> Stream (ByteStream m) m r -> Stream (Of (Location w FwdPosition (BioSequence ty))) m r
{-# Inlinable chunksToWindows #-}
chunksToWindows :: SequenceIdentifier w
-> Strand
-> Stream (ByteStream m) m r
-> Stream (Of (Location w FwdPosition (BioSequence ty))) m r
chunksToWindows SequenceIdentifier w
seqId Strand
s = ((ByteString, Int) -> Location w FwdPosition (BioSequence ty))
-> Stream (Of (ByteString, Int)) m r
-> Stream (Of (Location w FwdPosition (BioSequence ty))) m r
forall (m :: * -> *) a b r.
Monad m =>
(a -> b) -> Stream (Of a) m r -> Stream (Of b) m r
SP.map (ByteString, Int) -> Location w FwdPosition (BioSequence ty)
go (Stream (Of (ByteString, Int)) m r
 -> Stream (Of (Location w FwdPosition (BioSequence ty))) m r)
-> (Stream (ByteStream m) m r -> Stream (Of (ByteString, Int)) m r)
-> Stream (ByteStream m) m r
-> Stream (Of (Location w FwdPosition (BioSequence ty))) m r
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int
-> Stream (Of (ByteString, Int)) m r
-> Stream (Of (ByteString, Int)) m r
forall (m :: * -> *) a r.
Monad m =>
Int -> Stream (Of a) m r -> Stream (Of a) m r
SP.drop Int
1 (Stream (Of (ByteString, Int)) m r
 -> Stream (Of (ByteString, Int)) m r)
-> (Stream (ByteStream m) m r -> Stream (Of (ByteString, Int)) m r)
-> Stream (ByteStream m) m r
-> Stream (Of (ByteString, Int)) m r
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((ByteString, Int, Int) -> ByteString -> (ByteString, Int, Int))
-> (ByteString, Int, Int)
-> ((ByteString, Int, Int) -> (ByteString, Int))
-> Stream (Of ByteString) m r
-> Stream (Of (ByteString, Int)) m r
forall (m :: * -> *) x a b r.
Monad m =>
(x -> a -> x)
-> x -> (x -> b) -> Stream (Of a) m r -> Stream (Of b) m r
SP.scan (ByteString, Int, Int) -> ByteString -> (ByteString, Int, Int)
forall a b. (a, b, Int) -> ByteString -> (ByteString, Int, Int)
indexed (ByteString
BS.empty, Int
0, Int
0) (\(ByteString
bs,Int
i,Int
_) -> (ByteString
bs,Int
i)) (Stream (Of ByteString) m r -> Stream (Of (ByteString, Int)) m r)
-> (Stream (ByteStream m) m r -> Stream (Of ByteString) m r)
-> Stream (ByteStream m) m r
-> Stream (Of (ByteString, Int)) m r
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall x. ByteStream m x -> m (Of ByteString x))
-> Stream (ByteStream m) m r -> Stream (Of ByteString) m r
forall (m :: * -> *) (f :: * -> *) (g :: * -> *) r.
(Monad m, Functor f) =>
(forall x. f x -> m (g x)) -> Stream f m r -> Stream g m r
S.mapsM forall x. ByteStream m x -> m (Of ByteString x)
forall (m :: * -> *) r.
Monad m =>
ByteStream m r -> m (Of ByteString r)
S8.toStrict where
  indexed :: (a, b, Int) -> ByteString -> (ByteString, Int, Int)
indexed (a
_,b
cur,Int
next) ByteString
bs = (ByteString
bs,Int
next,Int
next Int -> Int -> Int
forall a. Num a => a -> a -> a
+ ByteString -> Int
BS.length ByteString
bs)
  go :: (ByteString, Int) -> Location w FwdPosition (BioSequence ty)
go (ByteString
bs,Int
i)
    = Location :: forall k (ident :: k) posTy seqTy.
SequenceIdentifier ident
-> posTy -> seqTy -> Location ident posTy seqTy
Location
        { _locIdentifier :: SequenceIdentifier w
_locIdentifier = SequenceIdentifier w
seqId
        , _locPosition :: FwdPosition
_locPosition   = Strand -> Index 0 -> FwdPosition
FwdPosition Strand
s (Int -> Index 0
forall (t :: Nat). Int -> Index t
Index Int
i)
        , _locSequence :: BioSequence ty
_locSequence   = ByteString -> BioSequence ty
forall k (which :: k). ByteString -> BioSequence which
BioSequence ByteString
bs
        }



-- | Make it possible to take a fasta stream and produce a stream of
-- 'BioSequenceWindow's. This is a convenience function around
-- 'withSuffix . withPrefix . chunksToWindows . reChunks'.
--
-- In case of a @Nothing@ window size, a single huge @Fasta@ entry is produced
-- (and materialized!).
--
-- TODO In case of @Nothing@ window size, we use the 'collapseData' function
-- which has one check too many, and will be slightly slower. However, the
-- check should be once per @ByteString@.

streamedWindows
  :: (Monad m)
  => Maybe Int
  -> Maybe Int
  -> Maybe Int
    -- ^ desired size or a single huge @Fasta@ entry.
  -> SequenceIdentifier w
  -> Strand
  -> (Stream (ByteStream m) m) r
--  -> Stream (Of (BioSequenceWindow w ty FwdLocation)) m r
  -> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
{-# Inlinable streamedWindows #-}
streamedWindows :: Maybe Int
-> Maybe Int
-> Maybe Int
-> SequenceIdentifier w
-> Strand
-> Stream (ByteStream m) m r
-> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
streamedWindows Maybe Int
withPrefix Maybe Int
withSuffix Maybe Int
winSz SequenceIdentifier w
seqId Strand
strnd
  = ((Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
 -> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r)
-> (Int
    -> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
    -> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r)
-> Maybe Int
-> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
-> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
-> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
forall a. a -> a
id Int
-> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
-> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
forall k (m :: * -> *) p s (i :: k) r.
(Monad m, ModifyLocation p s) =>
Int -> Stream (Of (PIS i p s)) m r -> Stream (Of (PIS i p s)) m r
attachSuffixes Maybe Int
withSuffix)
  (Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
 -> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r)
-> (Stream (ByteStream m) m r
    -> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r)
-> Stream (ByteStream m) m r
-> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
 -> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r)
-> (Int
    -> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
    -> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r)
-> Maybe Int
-> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
-> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
-> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
forall a. a -> a
id Int
-> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
-> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
forall k (m :: * -> *) p s (i :: k) r.
(Monad m, ModifyLocation p s) =>
Int -> Stream (Of (PIS i p s)) m r -> Stream (Of (PIS i p s)) m r
attachPrefixes Maybe Int
withPrefix)
  (Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
 -> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r)
-> (Stream (ByteStream m) m r
    -> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r)
-> Stream (ByteStream m) m r
-> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Location w FwdPosition (BioSequence ty)
 -> PIS w FwdPosition (BioSequence ty))
-> Stream (Of (Location w FwdPosition (BioSequence ty))) m r
-> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
forall (m :: * -> *) a b r.
Monad m =>
(a -> b) -> Stream (Of a) m r -> Stream (Of b) m r
SP.map Location w FwdPosition (BioSequence ty)
-> PIS w FwdPosition (BioSequence ty)
forall k (i :: k) p s. Location i p s -> PIS i p s
pis
  (Stream (Of (Location w FwdPosition (BioSequence ty))) m r
 -> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r)
-> (Stream (ByteStream m) m r
    -> Stream (Of (Location w FwdPosition (BioSequence ty))) m r)
-> Stream (ByteStream m) m r
-> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SequenceIdentifier w
-> Strand
-> Stream (ByteStream m) m r
-> Stream (Of (Location w FwdPosition (BioSequence ty))) m r
forall k k (m :: * -> *) (w :: k) r (ty :: k).
Monad m =>
SequenceIdentifier w
-> Strand
-> Stream (ByteStream m) m r
-> Stream (Of (Location w FwdPosition (BioSequence ty))) m r
chunksToWindows SequenceIdentifier w
seqId Strand
strnd
  (Stream (ByteStream m) m r
 -> Stream (Of (Location w FwdPosition (BioSequence ty))) m r)
-> (Stream (ByteStream m) m r -> Stream (ByteStream m) m r)
-> Stream (ByteStream m) m r
-> Stream (Of (Location w FwdPosition (BioSequence ty))) m r
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (case Maybe Int
winSz of { Maybe Int
Nothing -> Stream (ByteStream m) m r -> Stream (ByteStream m) m r
forall (m :: * -> *) r.
Monad m =>
Stream (ByteStream m) m r -> Stream (ByteStream m) m r
collapseData; Just Int
sz -> Int -> Stream (ByteStream m) m r -> Stream (ByteStream m) m r
forall (m :: * -> *) r.
Monad m =>
Int -> Stream (ByteStream m) m r -> Stream (ByteStream m) m r
reChunkBS Int
sz })

-- | Get the full length of a stream of 'BioSequenceWindow's, counted in
-- characters in each 'bswSequence'.
--
-- To use, start with @bswSeqLength $ SP.copy xs@. Then consume this stream
-- normally. It still provides a 'Stream' of 'BioSequenceWindows's. However,
-- the return type is now not just @r@, but it provides @Int SP.:> r@, where
-- the @Int@ provides the total length of characters within this @Fasta@ entry.
--
-- This value may then be used to fully update negative strand information.

streamLocationLength :: (Monad m, ModifyLocation posTy seqTy) => Stream (Of (Location i posTy seqTy)) m r -> m (Of Int r)
{-# Inlinable streamLocationLength #-}
streamLocationLength :: Stream (Of (Location i posTy seqTy)) m r -> m (Of Int r)
streamLocationLength = (Int -> Location i posTy seqTy -> Int)
-> Int
-> (Int -> Int)
-> Stream (Of (Location i posTy seqTy)) m r
-> m (Of Int r)
forall (m :: * -> *) x a b r.
Monad m =>
(x -> a -> x) -> x -> (x -> b) -> Stream (Of a) m r -> m (Of b r)
SP.fold (\Int
x Location i posTy seqTy
w -> Int
x Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Location i posTy seqTy -> Int
forall posTy seqTy k (i :: k).
ModifyLocation posTy seqTy =>
Location i posTy seqTy -> Int
locLength Location i posTy seqTy
w) Int
0 Int -> Int
forall a. a -> a
id

-- | As a first function, the header should be extracted from a @Fasta@ stream. Since headers may be
-- malformed / malicious, we make it possible to

extractHeader
  :: (Monad m)
  => Maybe Int
  -> Stream (ByteStream m) m r
  -> m (Of BS.ByteString (Stream (ByteStream m) m r))
{-# Inlinable extractHeader #-}
extractHeader :: Maybe Int
-> Stream (ByteStream m) m r
-> m (Of ByteString (Stream (ByteStream m) m r))
extractHeader Maybe Int
hdrSz =
  let go :: ByteStream m (Stream (ByteStream m) m r)
-> ByteStream m (Stream (ByteStream m) m r)
go = case Maybe Int
hdrSz of { Maybe Int
Nothing -> ByteStream m (Stream (ByteStream m) m r)
-> ByteStream m (Stream (ByteStream m) m r)
forall a. a -> a
id; Just Int
sz -> ByteStream m (ByteStream m (Stream (ByteStream m) m r))
-> ByteStream m (Stream (ByteStream m) m r)
forall (m :: * -> *) (t :: (* -> *) -> * -> *) r.
(Monad m, MonadTrans t, Monad (t m)) =>
t m (ByteStream m r) -> t m r
S8.drained (ByteStream m (ByteStream m (Stream (ByteStream m) m r))
 -> ByteStream m (Stream (ByteStream m) m r))
-> (ByteStream m (Stream (ByteStream m) m r)
    -> ByteStream m (ByteStream m (Stream (ByteStream m) m r)))
-> ByteStream m (Stream (ByteStream m) m r)
-> ByteStream m (Stream (ByteStream m) m r)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int64
-> ByteStream m (Stream (ByteStream m) m r)
-> ByteStream m (ByteStream m (Stream (ByteStream m) m r))
forall (m :: * -> *) r.
Monad m =>
Int64 -> ByteStream m r -> ByteStream m (ByteStream m r)
S8.splitAt (Int -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
sz) }
  in ByteStream m (Stream (ByteStream m) m r)
-> m (Of ByteString (Stream (ByteStream m) m r))
forall (m :: * -> *) r.
Monad m =>
ByteStream m r -> m (Of ByteString r)
S8.toStrict (ByteStream m (Stream (ByteStream m) m r)
 -> m (Of ByteString (Stream (ByteStream m) m r)))
-> (Stream (ByteStream m) m r
    -> ByteStream m (Stream (ByteStream m) m r))
-> Stream (ByteStream m) m r
-> m (Of ByteString (Stream (ByteStream m) m r))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteStream m (Stream (ByteStream m) m r)
-> ByteStream m (Stream (ByteStream m) m r)
go (ByteStream m (Stream (ByteStream m) m r)
 -> ByteStream m (Stream (ByteStream m) m r))
-> (Stream (ByteStream m) m r
    -> ByteStream m (Stream (ByteStream m) m r))
-> Stream (ByteStream m) m r
-> ByteStream m (Stream (ByteStream m) m r)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Stream (ByteStream m) m (Stream (ByteStream m) m r)
-> ByteStream m (Stream (ByteStream m) m r)
forall (m :: * -> *) r.
Monad m =>
Stream (ByteStream m) m r -> ByteStream m r
S8.concat (Stream (ByteStream m) m (Stream (ByteStream m) m r)
 -> ByteStream m (Stream (ByteStream m) m r))
-> (Stream (ByteStream m) m r
    -> Stream (ByteStream m) m (Stream (ByteStream m) m r))
-> Stream (ByteStream m) m r
-> ByteStream m (Stream (ByteStream m) m r)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int
-> Stream (ByteStream m) m r
-> Stream (ByteStream m) m (Stream (ByteStream m) m r)
forall (m :: * -> *) (f :: * -> *) r.
(Monad m, Functor f) =>
Int -> Stream f m r -> Stream f m (Stream f m r)
S.splitsAt Int
1


{-
t0 = P.unlines
  [ ">Aaaa"
  , "123"
  , ">Bbbb"
  , "4567"
  , ">Cccc"
  , "890"
  ]


r4 = toList . streamingFasta (HeaderSize 2) (OverlapSize 1) (CurrentSize 2) . S8.fromStrict $ BS.pack t0
-}

{-
--eachFasta (Header h) (Overlap o) (Current c p) = SP.yield (h,o,c)
eachFasta (Header h) (Overlap o) (Current c p) = SP.yield (BS.length h, BS.length o, BS.length c)

--readFastaFile :: FilePath -> IO [(BS.ByteString,BS.ByteString,BS.ByteString)]
readFastaFile f = do
  let s = 1000000000000
  r ← runResourceT
          $ SP.mapM_ (liftIO . P.print)
          $ streamingFasta (HeaderSize s) (OverlapSize 0) (CurrentSize s) eachFasta
          $ S8.readFile f
  return r
-}

{-
readFastaFile f = do
  let s = 1000000000000
  r ← runResourceT
          $ SP.mapM_ (liftIO . P.print)
          $ SP.mapped S8.toStrict
          $ S8.split '>'
          $ S8.readFile f
  return r
-}