| Safe Haskell | None |
|---|---|
| Language | Haskell2010 |
Biobase.Fasta.Streaming
Description
Streaming Fasta handling via the streaming library.
The functions in here should be streaming in constant memory.
TODO Check if this is actually true with some unit tests.
Synopsis
- data FindHeader
- = FindHeader {
- headerParts :: [ByteString]
- headerLength :: !Int
- | HasHeader {
- header :: !ByteString
- dataOverlap :: !ByteString
- dataParts :: [ByteString]
- dataLength :: !Int
- entries :: !Int
- = FindHeader {
- data Current (which :: k) = Current {
- currentFasta :: ByteString
- currentStart :: Index 0
- newtype Overlap (which :: k) = Overlap {}
- newtype Header (which :: k) = Header {}
- newtype CurrentSize = CurrentSize Int
- newtype OverlapSize = OverlapSize Int
- newtype HeaderSize = HeaderSize Int
- streamingFasta :: forall m w r a. Monad m => HeaderSize -> OverlapSize -> CurrentSize -> (Header w -> Overlap w -> Current w -> Stream (Of a) m ()) -> ByteString m r -> Stream (Of a) m r
- eachFasta :: Monad m => Header which1 -> Overlap which2 -> Current which3 -> Stream (Of (ByteString, ByteString, ByteString)) m ()
- parseFastaFile :: FilePath -> IO [Fasta]
- parseFasta :: ByteString -> [Fasta]
Documentation
data FindHeader Source #
Control structure for streamingFasta.
Constructors
| FindHeader | |
Fields
| |
| HasHeader | |
Fields
| |
data Current (which :: k) Source #
Current Fasta window, together with the start index (0-based).
Constructors
| Current | |
Fields
| |
Instances
| Eq (Current which) Source # | |
| Ord (Current which) Source # | |
Defined in Biobase.Fasta.Streaming Methods compare :: Current which -> Current which -> Ordering # (<) :: Current which -> Current which -> Bool # (<=) :: Current which -> Current which -> Bool # (>) :: Current which -> Current which -> Bool # (>=) :: Current which -> Current which -> Bool # | |
| Show (Current which) Source # | |
newtype Overlap (which :: k) Source #
Constructors
| Overlap | |
Fields | |
Instances
| Eq (Overlap which) Source # | |
| Ord (Overlap which) Source # | |
Defined in Biobase.Fasta.Streaming Methods compare :: Overlap which -> Overlap which -> Ordering # (<) :: Overlap which -> Overlap which -> Bool # (<=) :: Overlap which -> Overlap which -> Bool # (>) :: Overlap which -> Overlap which -> Bool # (>=) :: Overlap which -> Overlap which -> Bool # | |
| Show (Overlap which) Source # | |
newtype Header (which :: k) Source #
Constructors
| Header | |
Fields | |
Instances
| Eq (Header which) Source # | |
| Ord (Header which) Source # | |
Defined in Biobase.Fasta.Streaming | |
| Show (Header which) Source # | |
newtype CurrentSize Source #
Constructors
| CurrentSize Int |
Instances
| Eq CurrentSize Source # | |
Defined in Biobase.Fasta.Streaming | |
| Ord CurrentSize Source # | |
Defined in Biobase.Fasta.Streaming Methods compare :: CurrentSize -> CurrentSize -> Ordering # (<) :: CurrentSize -> CurrentSize -> Bool # (<=) :: CurrentSize -> CurrentSize -> Bool # (>) :: CurrentSize -> CurrentSize -> Bool # (>=) :: CurrentSize -> CurrentSize -> Bool # max :: CurrentSize -> CurrentSize -> CurrentSize # min :: CurrentSize -> CurrentSize -> CurrentSize # | |
| Show CurrentSize Source # | |
Defined in Biobase.Fasta.Streaming Methods showsPrec :: Int -> CurrentSize -> ShowS # show :: CurrentSize -> String # showList :: [CurrentSize] -> ShowS # | |
newtype OverlapSize Source #
Constructors
| OverlapSize Int |
Instances
| Eq OverlapSize Source # | |
Defined in Biobase.Fasta.Streaming | |
| Ord OverlapSize Source # | |
Defined in Biobase.Fasta.Streaming Methods compare :: OverlapSize -> OverlapSize -> Ordering # (<) :: OverlapSize -> OverlapSize -> Bool # (<=) :: OverlapSize -> OverlapSize -> Bool # (>) :: OverlapSize -> OverlapSize -> Bool # (>=) :: OverlapSize -> OverlapSize -> Bool # max :: OverlapSize -> OverlapSize -> OverlapSize # min :: OverlapSize -> OverlapSize -> OverlapSize # | |
| Show OverlapSize Source # | |
Defined in Biobase.Fasta.Streaming Methods showsPrec :: Int -> OverlapSize -> ShowS # show :: OverlapSize -> String # showList :: [OverlapSize] -> ShowS # | |
newtype HeaderSize Source #
Constructors
| HeaderSize Int |
Instances
| Eq HeaderSize Source # | |
Defined in Biobase.Fasta.Streaming | |
| Ord HeaderSize Source # | |
Defined in Biobase.Fasta.Streaming Methods compare :: HeaderSize -> HeaderSize -> Ordering # (<) :: HeaderSize -> HeaderSize -> Bool # (<=) :: HeaderSize -> HeaderSize -> Bool # (>) :: HeaderSize -> HeaderSize -> Bool # (>=) :: HeaderSize -> HeaderSize -> Bool # max :: HeaderSize -> HeaderSize -> HeaderSize # min :: HeaderSize -> HeaderSize -> HeaderSize # | |
| Show HeaderSize Source # | |
Defined in Biobase.Fasta.Streaming Methods showsPrec :: Int -> HeaderSize -> ShowS # show :: HeaderSize -> String # showList :: [HeaderSize] -> ShowS # | |
Arguments
| :: Monad m | |
| => HeaderSize | Maximal length of the header. Ok to set to |
| -> OverlapSize | How much of the current size to carry over to the next step. Even if set
larger than current size, it will only be at most current size. (But see
todo at |
| -> CurrentSize | The size of each window to be processed. |
| -> (Header w -> Overlap w -> Current w -> Stream (Of a) m ()) | The processing function. Takes in the header, any overlap from the
previous window, the current window and produces a stream of |
| -> ByteString m r | A streaming bytestring of Fasta files. |
| -> Stream (Of a) m r | The outgoing stream of |
Fully stream a fasta file, making sure to never exceed a constant amount
of memory. The go function yields values of type a down the line for
continued streaming.
r4 = toList . streamingFasta (HeaderSize 2) (OverlapSize 1) (CurrentSize 2) go . S8.fromStrict $ BS.pack t0 where go (Header h) (Overlap o) (Current c) = yield (h,o,c)
eachFasta :: Monad m => Header which1 -> Overlap which2 -> Current which3 -> Stream (Of (ByteString, ByteString, ByteString)) m () Source #
parseFasta :: ByteString -> [Fasta] Source #