BiobaseFasta-0.2.0.0: streaming FASTA parser

Safe HaskellNone
LanguageHaskell2010

Biobase.Fasta.Streaming

Description

Streaming Fasta handling via the streaming library.

The functions in here should be streaming in constant memory.

TODO Check if this is actually true with some unit tests.

Synopsis

Documentation

data FindHeader Source #

Control structure for streamingFasta.

Constructors

FindHeader 

Fields

HasHeader 

Fields

data Current (which :: k) Source #

Current Fasta window, together with the start index (0-based).

Constructors

Current 
Instances
Eq (Current which) Source # 
Instance details

Defined in Biobase.Fasta.Streaming

Methods

(==) :: Current which -> Current which -> Bool #

(/=) :: Current which -> Current which -> Bool #

Ord (Current which) Source # 
Instance details

Defined in Biobase.Fasta.Streaming

Methods

compare :: Current which -> Current which -> Ordering #

(<) :: Current which -> Current which -> Bool #

(<=) :: Current which -> Current which -> Bool #

(>) :: Current which -> Current which -> Bool #

(>=) :: Current which -> Current which -> Bool #

max :: Current which -> Current which -> Current which #

min :: Current which -> Current which -> Current which #

Show (Current which) Source # 
Instance details

Defined in Biobase.Fasta.Streaming

Methods

showsPrec :: Int -> Current which -> ShowS #

show :: Current which -> String #

showList :: [Current which] -> ShowS #

newtype Overlap (which :: k) Source #

Constructors

Overlap 
Instances
Eq (Overlap which) Source # 
Instance details

Defined in Biobase.Fasta.Streaming

Methods

(==) :: Overlap which -> Overlap which -> Bool #

(/=) :: Overlap which -> Overlap which -> Bool #

Ord (Overlap which) Source # 
Instance details

Defined in Biobase.Fasta.Streaming

Methods

compare :: Overlap which -> Overlap which -> Ordering #

(<) :: Overlap which -> Overlap which -> Bool #

(<=) :: Overlap which -> Overlap which -> Bool #

(>) :: Overlap which -> Overlap which -> Bool #

(>=) :: Overlap which -> Overlap which -> Bool #

max :: Overlap which -> Overlap which -> Overlap which #

min :: Overlap which -> Overlap which -> Overlap which #

Show (Overlap which) Source # 
Instance details

Defined in Biobase.Fasta.Streaming

Methods

showsPrec :: Int -> Overlap which -> ShowS #

show :: Overlap which -> String #

showList :: [Overlap which] -> ShowS #

newtype Header (which :: k) Source #

Constructors

Header 
Instances
Eq (Header which) Source # 
Instance details

Defined in Biobase.Fasta.Streaming

Methods

(==) :: Header which -> Header which -> Bool #

(/=) :: Header which -> Header which -> Bool #

Ord (Header which) Source # 
Instance details

Defined in Biobase.Fasta.Streaming

Methods

compare :: Header which -> Header which -> Ordering #

(<) :: Header which -> Header which -> Bool #

(<=) :: Header which -> Header which -> Bool #

(>) :: Header which -> Header which -> Bool #

(>=) :: Header which -> Header which -> Bool #

max :: Header which -> Header which -> Header which #

min :: Header which -> Header which -> Header which #

Show (Header which) Source # 
Instance details

Defined in Biobase.Fasta.Streaming

Methods

showsPrec :: Int -> Header which -> ShowS #

show :: Header which -> String #

showList :: [Header which] -> ShowS #

streamingFasta Source #

Arguments

:: Monad m 
=> HeaderSize

Maximal length of the header. Ok to set to 20 000, only guards against an extremely long header line.

-> OverlapSize

How much of the current size to carry over to the next step. Even if set larger than current size, it will only be at most current size. (But see todo at overlappedFasta)

-> CurrentSize

The size of each window to be processed.

-> (Header w -> Overlap w -> Current w -> Stream (Of a) m ())

The processing function. Takes in the header, any overlap from the previous window, the current window and produces a stream of as.

-> ByteString m r

A streaming bytestring of Fasta files.

-> Stream (Of a) m r

The outgoing stream of as being processed.

Fully stream a fasta file, making sure to never exceed a constant amount of memory. The go function yields values of type a down the line for continued streaming.

r4 = toList . streamingFasta (HeaderSize 2) (OverlapSize 1) (CurrentSize 2) go . S8.fromStrict $ BS.pack t0
 where go (Header h) (Overlap o) (Current c) = yield (h,o,c)

eachFasta :: Monad m => Header which1 -> Overlap which2 -> Current which3 -> Stream (Of (ByteString, ByteString, ByteString)) m () Source #