Safe Haskell | None |
---|---|
Language | Haskell2010 |
Streaming Fasta handling via the streaming
library.
The functions in here should be streaming in constant memory.
TODO Check if this is actually true with some unit tests.
Synopsis
- data FindHeader
- = FindHeader {
- headerParts :: [ByteString]
- headerLength :: !Int
- | HasHeader {
- header :: !ByteString
- dataOverlap :: !ByteString
- dataParts :: [ByteString]
- dataLength :: !Int
- entries :: !Int
- = FindHeader {
- data Current (which :: k) = Current {
- currentFasta :: ByteString
- currentStart :: Index 0
- newtype Overlap (which :: k) = Overlap {}
- newtype Header (which :: k) = Header {}
- newtype CurrentSize = CurrentSize Int
- newtype OverlapSize = OverlapSize Int
- newtype HeaderSize = HeaderSize Int
- streamingFasta :: forall m w r a. Monad m => HeaderSize -> OverlapSize -> CurrentSize -> (Header w -> Overlap w -> Current w -> Stream (Of a) m ()) -> ByteString m r -> Stream (Of a) m r
- eachFasta :: Monad m => Header which1 -> Overlap which2 -> Current which3 -> Stream (Of (ByteString, ByteString, ByteString)) m ()
- parseFastaFile :: FilePath -> IO [Fasta]
- parseFasta :: ByteString -> [Fasta]
Documentation
data FindHeader Source #
Control structure for streamingFasta
.
FindHeader | |
| |
HasHeader | |
|
data Current (which :: k) Source #
Current Fasta window, together with the start index (0-based).
Current | |
|
Instances
Eq (Current which) Source # | |
Ord (Current which) Source # | |
Defined in Biobase.Fasta.Streaming compare :: Current which -> Current which -> Ordering # (<) :: Current which -> Current which -> Bool # (<=) :: Current which -> Current which -> Bool # (>) :: Current which -> Current which -> Bool # (>=) :: Current which -> Current which -> Bool # | |
Show (Current which) Source # | |
newtype Overlap (which :: k) Source #
Instances
Eq (Overlap which) Source # | |
Ord (Overlap which) Source # | |
Defined in Biobase.Fasta.Streaming compare :: Overlap which -> Overlap which -> Ordering # (<) :: Overlap which -> Overlap which -> Bool # (<=) :: Overlap which -> Overlap which -> Bool # (>) :: Overlap which -> Overlap which -> Bool # (>=) :: Overlap which -> Overlap which -> Bool # | |
Show (Overlap which) Source # | |
newtype Header (which :: k) Source #
Instances
Eq (Header which) Source # | |
Ord (Header which) Source # | |
Defined in Biobase.Fasta.Streaming | |
Show (Header which) Source # | |
newtype CurrentSize Source #
Instances
Eq CurrentSize Source # | |
Defined in Biobase.Fasta.Streaming (==) :: CurrentSize -> CurrentSize -> Bool # (/=) :: CurrentSize -> CurrentSize -> Bool # | |
Ord CurrentSize Source # | |
Defined in Biobase.Fasta.Streaming compare :: CurrentSize -> CurrentSize -> Ordering # (<) :: CurrentSize -> CurrentSize -> Bool # (<=) :: CurrentSize -> CurrentSize -> Bool # (>) :: CurrentSize -> CurrentSize -> Bool # (>=) :: CurrentSize -> CurrentSize -> Bool # max :: CurrentSize -> CurrentSize -> CurrentSize # min :: CurrentSize -> CurrentSize -> CurrentSize # | |
Show CurrentSize Source # | |
Defined in Biobase.Fasta.Streaming showsPrec :: Int -> CurrentSize -> ShowS # show :: CurrentSize -> String # showList :: [CurrentSize] -> ShowS # |
newtype OverlapSize Source #
Instances
Eq OverlapSize Source # | |
Defined in Biobase.Fasta.Streaming (==) :: OverlapSize -> OverlapSize -> Bool # (/=) :: OverlapSize -> OverlapSize -> Bool # | |
Ord OverlapSize Source # | |
Defined in Biobase.Fasta.Streaming compare :: OverlapSize -> OverlapSize -> Ordering # (<) :: OverlapSize -> OverlapSize -> Bool # (<=) :: OverlapSize -> OverlapSize -> Bool # (>) :: OverlapSize -> OverlapSize -> Bool # (>=) :: OverlapSize -> OverlapSize -> Bool # max :: OverlapSize -> OverlapSize -> OverlapSize # min :: OverlapSize -> OverlapSize -> OverlapSize # | |
Show OverlapSize Source # | |
Defined in Biobase.Fasta.Streaming showsPrec :: Int -> OverlapSize -> ShowS # show :: OverlapSize -> String # showList :: [OverlapSize] -> ShowS # |
newtype HeaderSize Source #
Instances
Eq HeaderSize Source # | |
Defined in Biobase.Fasta.Streaming (==) :: HeaderSize -> HeaderSize -> Bool # (/=) :: HeaderSize -> HeaderSize -> Bool # | |
Ord HeaderSize Source # | |
Defined in Biobase.Fasta.Streaming compare :: HeaderSize -> HeaderSize -> Ordering # (<) :: HeaderSize -> HeaderSize -> Bool # (<=) :: HeaderSize -> HeaderSize -> Bool # (>) :: HeaderSize -> HeaderSize -> Bool # (>=) :: HeaderSize -> HeaderSize -> Bool # max :: HeaderSize -> HeaderSize -> HeaderSize # min :: HeaderSize -> HeaderSize -> HeaderSize # | |
Show HeaderSize Source # | |
Defined in Biobase.Fasta.Streaming showsPrec :: Int -> HeaderSize -> ShowS # show :: HeaderSize -> String # showList :: [HeaderSize] -> ShowS # |
:: Monad m | |
=> HeaderSize | Maximal length of the header. Ok to set to |
-> OverlapSize | How much of the current size to carry over to the next step. Even if set
larger than current size, it will only be at most current size. (But see
todo at |
-> CurrentSize | The size of each window to be processed. |
-> (Header w -> Overlap w -> Current w -> Stream (Of a) m ()) | The processing function. Takes in the header, any overlap from the
previous window, the current window and produces a stream of |
-> ByteString m r | A streaming bytestring of Fasta files. |
-> Stream (Of a) m r | The outgoing stream of |
Fully stream a fasta file, making sure to never exceed a constant amount
of memory. The go
function yields values of type a
down the line for
continued streaming.
r4 = toList . streamingFasta (HeaderSize 2) (OverlapSize 1) (CurrentSize 2) go . S8.fromStrict $ BS.pack t0 where go (Header h) (Overlap o) (Current c) = yield (h,o,c)
eachFasta :: Monad m => Header which1 -> Overlap which2 -> Current which3 -> Stream (Of (ByteString, ByteString, ByteString)) m () Source #
parseFasta :: ByteString -> [Fasta] Source #