-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | streaming FASTA parser -- -- Stream-based handling of FASTA files. The user selects a window size, -- the library then handles the window. For each window, the previous -- (past) window is available, in case some data sits on the boundary -- between windows. -- -- Greg Schwartz' http://hackage.haskell.org/package/fasta package -- is a lot more complete. This one is mostly tailored to my usage -- requirements (and may at some point use his library). @package BiobaseFasta @version 0.3.0.0 -- | Streaming Fasta handling via the streaming library. -- -- The functions in here should be streaming in constant memory. -- -- TODO Check if this is actually true with some unit tests. module Biobase.Fasta.Streaming -- | Control structure for streamingFasta. data FindHeader FindHeader :: [ByteString] -> !Int -> FindHeader -- | the collected header parts (in reverse order) [headerParts] :: FindHeader -> [ByteString] -- | accumulated header length [headerLength] :: FindHeader -> !Int HasHeader :: !ByteString -> !ByteString -> [ByteString] -> !Int -> !Int -> FindHeader -- | the (size-truncated) header for this fasta file [fhHeader] :: FindHeader -> !ByteString -- | overlap (if any) from earlier parts of the fasta file [dataOverlap] :: FindHeader -> !ByteString -- | collection of dataParts, in reverse order! [dataParts] :: FindHeader -> [ByteString] -- | total length of data parts, simplifies checking if enough data was -- collected [dataLength] :: FindHeader -> !Int -- | count how many entries we have seen [entries] :: FindHeader -> !Int newtype CurrentSize CurrentSize :: Int -> CurrentSize newtype OverlapSize OverlapSize :: Int -> OverlapSize newtype HeaderSize HeaderSize :: Int -> HeaderSize -- | lens into the unique id / first word of the header. fastaUid :: Lens' (SequenceIdentifier w) ByteString -- | Fully stream a fasta file, making sure to never exceed a constant -- amount of memory. The go function yields values of type -- a down the line for continued streaming. -- --
-- r4 = toList . streamingFasta (HeaderSize 2) (OverlapSize 1) (CurrentSize 2) go . S8.fromStrict $ BS.pack t0 -- where go (Header h) (Overlap o) (Current c) = yield (h,o,c) --streamingFasta :: forall m w ty k r a. Monad m => HeaderSize -> OverlapSize -> CurrentSize -> ByteString m r -> Stream (Of (BioSequenceWindow w ty k)) m r instance GHC.Show.Show Biobase.Fasta.Streaming.CurrentSize instance GHC.Classes.Ord Biobase.Fasta.Streaming.CurrentSize instance GHC.Classes.Eq Biobase.Fasta.Streaming.CurrentSize instance GHC.Show.Show Biobase.Fasta.Streaming.OverlapSize instance GHC.Classes.Ord Biobase.Fasta.Streaming.OverlapSize instance GHC.Classes.Eq Biobase.Fasta.Streaming.OverlapSize instance GHC.Show.Show Biobase.Fasta.Streaming.HeaderSize instance GHC.Classes.Ord Biobase.Fasta.Streaming.HeaderSize instance GHC.Classes.Eq Biobase.Fasta.Streaming.HeaderSize -- | A convenience module for *small* Fasta entries, that are -- completely in memory and *not* to be streamed. -- -- The Data.ByteString.Strict.Lens module is very helpful for -- further handling of Fasta entries. -- -- For convenience, the convertString function from -- string-conversions is supplied. module Biobase.Fasta.Strict -- | A *strict* Fasta entry. data Fasta which ty Fasta :: !SequenceIdentifier which -> !BioSequence ty -> Fasta which ty [_header] :: Fasta which ty -> !SequenceIdentifier which [_fasta] :: Fasta which ty -> !BioSequence ty -- | If you don't want to deal with the phantom types. type FastaUntyped = Fasta Void Void fasta :: forall which_anQa ty_anQb ty_apOX. Lens (Fasta which_anQa ty_anQb) (Fasta which_anQa ty_apOX) (BioSequence ty_anQb) (BioSequence ty_apOX) header :: forall which_anQa ty_anQb which_apOY. Lens (Fasta which_anQa ty_anQb) (Fasta which_apOY ty_anQb) (SequenceIdentifier which_anQa) (SequenceIdentifier which_apOY) -- | Render a Fasta entry to a ByteString. Will end with a -- final n in any case. fastaToByteString :: Int -> Fasta which ty -> ByteString -- | Render a Fasta entry to a Builder. Will end with a -- final n in any case. fastaToBuilder :: Int -> Fasta which ty -> Builder -- | Try to parse a ByteString as a Fasta, failing with -- Left, succees with Right. byteStringToFasta :: ByteString -> Either String (Fasta which ty) -- | Try to parse a ByteString as multiple Fasta entries. -- Even though this is using the underlying streaming interface, this is -- not streaming. byteStringToMultiFasta :: ByteString -> [Fasta which ty] -- | A lens that goes from a BioSequenceWindow to a Fasta. windowedFasta :: Lens' (BioSequenceWindow w ty k) (Fasta w ty) -- | A prism from a ByteString to a Fasta. Note that this -- will only be an identity if the underlying fasta file is rendered with -- k characters per line. rawFasta :: Int -> Prism' ByteString (Fasta which ty) convertString :: ConvertibleStrings a b => a -> b instance forall k1 (which :: k1) k2 (ty :: k2). GHC.Generics.Generic (Biobase.Fasta.Strict.Fasta which ty) instance forall k1 (which :: k1) k2 (ty :: k2). GHC.Show.Show (Biobase.Fasta.Strict.Fasta which ty) instance forall k1 (which :: k1) k2 (ty :: k2). GHC.Read.Read (Biobase.Fasta.Strict.Fasta which ty) instance forall k1 (which :: k1) k2 (ty :: k2). GHC.Classes.Ord (Biobase.Fasta.Strict.Fasta which ty) instance forall k1 (which :: k1) k2 (ty :: k2). GHC.Classes.Eq (Biobase.Fasta.Strict.Fasta which ty)