Safe Haskell | None |
---|---|
Language | Haskell2010 |
Streaming Fasta handling via the streaming
library.
The functions in here should be streaming in constant memory.
TODO Check if this is actually true with some unit tests.
Synopsis
- data FindHeader
- = FindHeader {
- headerParts :: [ByteString]
- headerLength :: !Int
- | HasHeader {
- fhHeader :: !ByteString
- dataOverlap :: !ByteString
- dataParts :: [ByteString]
- dataLength :: !Int
- entries :: !Int
- = FindHeader {
- newtype CurrentSize = CurrentSize Int
- newtype OverlapSize = OverlapSize Int
- newtype HeaderSize = HeaderSize Int
- fastaUid :: Lens' (SequenceIdentifier w) ByteString
- streamingFasta :: forall m w ty k r a. Monad m => HeaderSize -> OverlapSize -> CurrentSize -> ByteString m r -> Stream (Of (BioSequenceWindow w ty k)) m r
Documentation
data FindHeader Source #
Control structure for streamingFasta
.
FindHeader | |
| |
HasHeader | |
|
newtype CurrentSize Source #
Instances
Eq CurrentSize Source # | |
Defined in Biobase.Fasta.Streaming (==) :: CurrentSize -> CurrentSize -> Bool # (/=) :: CurrentSize -> CurrentSize -> Bool # | |
Ord CurrentSize Source # | |
Defined in Biobase.Fasta.Streaming compare :: CurrentSize -> CurrentSize -> Ordering # (<) :: CurrentSize -> CurrentSize -> Bool # (<=) :: CurrentSize -> CurrentSize -> Bool # (>) :: CurrentSize -> CurrentSize -> Bool # (>=) :: CurrentSize -> CurrentSize -> Bool # max :: CurrentSize -> CurrentSize -> CurrentSize # min :: CurrentSize -> CurrentSize -> CurrentSize # | |
Show CurrentSize Source # | |
Defined in Biobase.Fasta.Streaming showsPrec :: Int -> CurrentSize -> ShowS # show :: CurrentSize -> String # showList :: [CurrentSize] -> ShowS # |
newtype OverlapSize Source #
Instances
Eq OverlapSize Source # | |
Defined in Biobase.Fasta.Streaming (==) :: OverlapSize -> OverlapSize -> Bool # (/=) :: OverlapSize -> OverlapSize -> Bool # | |
Ord OverlapSize Source # | |
Defined in Biobase.Fasta.Streaming compare :: OverlapSize -> OverlapSize -> Ordering # (<) :: OverlapSize -> OverlapSize -> Bool # (<=) :: OverlapSize -> OverlapSize -> Bool # (>) :: OverlapSize -> OverlapSize -> Bool # (>=) :: OverlapSize -> OverlapSize -> Bool # max :: OverlapSize -> OverlapSize -> OverlapSize # min :: OverlapSize -> OverlapSize -> OverlapSize # | |
Show OverlapSize Source # | |
Defined in Biobase.Fasta.Streaming showsPrec :: Int -> OverlapSize -> ShowS # show :: OverlapSize -> String # showList :: [OverlapSize] -> ShowS # |
newtype HeaderSize Source #
Instances
Eq HeaderSize Source # | |
Defined in Biobase.Fasta.Streaming (==) :: HeaderSize -> HeaderSize -> Bool # (/=) :: HeaderSize -> HeaderSize -> Bool # | |
Ord HeaderSize Source # | |
Defined in Biobase.Fasta.Streaming compare :: HeaderSize -> HeaderSize -> Ordering # (<) :: HeaderSize -> HeaderSize -> Bool # (<=) :: HeaderSize -> HeaderSize -> Bool # (>) :: HeaderSize -> HeaderSize -> Bool # (>=) :: HeaderSize -> HeaderSize -> Bool # max :: HeaderSize -> HeaderSize -> HeaderSize # min :: HeaderSize -> HeaderSize -> HeaderSize # | |
Show HeaderSize Source # | |
Defined in Biobase.Fasta.Streaming showsPrec :: Int -> HeaderSize -> ShowS # show :: HeaderSize -> String # showList :: [HeaderSize] -> ShowS # |
fastaUid :: Lens' (SequenceIdentifier w) ByteString Source #
lens into the unique id / first word of the header.
:: Monad m | |
=> HeaderSize | Maximal length of the header. Ok to set to |
-> OverlapSize | How much of the current size to carry over to the next step. Even if set
larger than current size, it will only be at most current size. (But see
todo at |
-> CurrentSize | The size of each window to be processed. |
-> ByteString m r | A streaming bytestring of Fasta files. |
-> Stream (Of (BioSequenceWindow w ty k)) m r | The outgoing stream of |
Fully stream a fasta file, making sure to never exceed a constant amount
of memory. The go
function yields values of type a
down the line for
continued streaming.
r4 = toList . streamingFasta (HeaderSize 2) (OverlapSize 1) (CurrentSize 2) go . S8.fromStrict $ BS.pack t0 where go (Header h) (Overlap o) (Current c) = yield (h,o,c)