-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | streaming FASTA parser
--
-- Stream-based handling of FASTA files. The user selects a window size,
-- the library then handles the window. For each window, the previous
-- (past) window is available, in case some data sits on the boundary
-- between windows.
--
-- Greg Schwartz' http://hackage.haskell.org/package/fasta package
-- is a lot more complete. This one is mostly tailored to my usage
-- requirements (and may at some point use his library).
@package BiobaseFasta
@version 0.4.0.1
-- | Streaming Fasta handling via the streaming library.
--
-- The functions in here should be streaming in constant memory.
--
-- A typical, slightly complicated is this: forEach :: forall r .
-- Stream (ByteString m) m r -> m (Stream (Of ()) m r) forEach dna =
-- do -- extract the header, but at most 123 characters, dropping the
-- rest hdr SP.:> dta ← extractHeader (Just 123) dna -- create windows
-- ws of a particular type. Include the prefix, the suffix, and
-- make each window 10 characters long let ws = (streamedWindows True
-- True (Just 10) (SequenceIdentifier hdr) PlusStrand dta :: SP.Stream
-- (SP.Of (BioSequenceWindow DNA DNA 0)) m r) -- count the number
-- of characters in dna, get the return value, print each window
-- count SP.:> r ← SP.mapM_ (liftIO . print) . bswSeqLength $ SP.copy
-- ws liftIO $ print count liftIO $ putStrLn "" -- yield one vacuous
-- () result, return the remainder r from dna. return $
-- SP.yield () *> return r
--
-- TODO Check if this is actually true with some unit tests.
module Biobase.Fasta.Streaming
streamedFasta :: Monad m => ByteStream m r -> Stream (Stream (ByteStream m) m) m r
-- | Here each individual fasta file will be a stream.
--
-- TODO Once this works, streamingFasta should be S.concats
-- . streamOfStreamedFasta ...
streamOfStreamedFasta :: forall m r. Monad m => ByteStream m r -> Stream (Stream (ByteStream m) m) m r
-- | Given a 'Stream (ByteString m) m r' which is a Stream of
-- lines, split off the first Fasta entry.
splitFasta :: Monad m => Stream (ByteStream m) m r -> Stream (ByteStream m) m (Stream (ByteStream m) m r)
-- | Given a stream, roughly like [BS Header, BS Data1,
-- BS Data2, ...] create a stream like [BS Header,
-- BS Data]. This means that the resulting stream holds
-- exactly two ByteString's.
collapseData :: Monad m => Stream (ByteStream m) m r -> Stream (ByteStream m) m r
-- | Rechunk a stream of bytestrings.
reChunkBS :: Monad m => Int -> Stream (ByteStream m) m r -> Stream (ByteStream m) m r
-- | Assuming a "rechunked" stream of bytestrings, create sequence windows.
chunksToWindows :: Monad m => SequenceIdentifier w -> Strand -> Stream (ByteStream m) m r -> Stream (Of (Location w FwdPosition (BioSequence ty))) m r
-- | Make it possible to take a fasta stream and produce a stream of
-- BioSequenceWindows. This is a convenience function around
-- 'withSuffix . withPrefix . chunksToWindows . reChunks'.
--
-- In case of a Nothing window size, a single huge
-- Fasta entry is produced (and materialized!).
--
-- TODO In case of Nothing window size, we use the
-- collapseData function which has one check too many, and will be
-- slightly slower. However, the check should be once per
-- ByteString.
streamedWindows :: Monad m => Maybe Int -> Maybe Int -> Maybe Int -> SequenceIdentifier w -> Strand -> Stream (ByteStream m) m r -> Stream (Of (PIS w FwdPosition (BioSequence ty))) m r
-- | Get the full length of a stream of BioSequenceWindows,
-- counted in characters in each bswSequence.
--
-- To use, start with bswSeqLength $ SP.copy xs. Then consume
-- this stream normally. It still provides a Stream of
-- BioSequenceWindowss. However, the return type is now not just
-- r, but it provides Int SP.:> r, where the
-- Int provides the total length of characters within this
-- Fasta entry.
--
-- This value may then be used to fully update negative strand
-- information.
streamLocationLength :: (Monad m, ModifyLocation posTy seqTy) => Stream (Of (Location i posTy seqTy)) m r -> m (Of Int r)
-- | As a first function, the header should be extracted from a
-- Fasta stream. Since headers may be malformed / malicious, we
-- make it possible to
extractHeader :: Monad m => Maybe Int -> Stream (ByteStream m) m r -> m (Of ByteString (Stream (ByteStream m) m r))
-- | A convenience module for *small* Fasta entries, that are
-- completely in memory and *not* to be streamed.
--
-- The Data.ByteString.Strict.Lens module is very helpful for
-- further handling of Fasta entries.
--
-- For convenience, the convertString function from
-- string-conversions is supplied.
module Biobase.Fasta.Strict
-- | A *strict* Fasta entry.
data Fasta which ty
Fasta :: !SequenceIdentifier which -> !BioSequence ty -> Fasta which ty
[_header] :: Fasta which ty -> !SequenceIdentifier which
[_fasta] :: Fasta which ty -> !BioSequence ty
-- | If you don't want to deal with the phantom types.
type FastaUntyped = Fasta Void Void
fasta :: forall k_ad2l (which_acOY :: k_ad2l) k_ad2n (ty_acOZ :: k_ad2n) k_afxU (ty_afxT :: k_afxU). Lens (Fasta (which_acOY :: k_ad2l) (ty_acOZ :: k_ad2n)) (Fasta (which_acOY :: k_ad2l) (ty_afxT :: k_afxU)) (BioSequence ty_acOZ) (BioSequence ty_afxT)
header :: forall k_ad2l (which_acOY :: k_ad2l) k_ad2n (ty_acOZ :: k_ad2n) k_afxW (which_afxV :: k_afxW). Lens (Fasta (which_acOY :: k_ad2l) (ty_acOZ :: k_ad2n)) (Fasta (which_afxV :: k_afxW) (ty_acOZ :: k_ad2n)) (SequenceIdentifier which_acOY) (SequenceIdentifier which_afxV)
-- | Render a Fasta entry to a ByteString. Will end with a
-- final n in any case.
fastaToByteString :: Int -> Fasta which ty -> ByteString
-- | Render a Fasta entry to a Builder. Will end with a
-- final n in any case.
fastaToBuilder :: Int -> Fasta which ty -> Builder
-- | Try to parse a ByteString as a Fasta, failing with
-- Left, succees with Right.
byteStringToFasta :: ByteString -> Either String (Fasta which ty)
-- | Try to parse a ByteString as multiple Fasta entries.
-- Even though this is using the underlying streaming interface, this is
-- not streaming.
--
-- A lens that goes from a BioSequenceWindow to a Fasta.
--
-- A prism from a ByteString to a Fasta. Note that this
-- will only be an identity if the underlying fasta file is rendered with
-- k characters per line.
rawFasta :: Int -> Prism' ByteString (Fasta which ty)
convertString :: ConvertibleStrings a b => a -> b
instance forall k1 (which :: k1) k2 (ty :: k2). GHC.Generics.Generic (Biobase.Fasta.Strict.Fasta which ty)
instance forall k1 (which :: k1) k2 (ty :: k2). GHC.Show.Show (Biobase.Fasta.Strict.Fasta which ty)
instance forall k1 (which :: k1) k2 (ty :: k2). GHC.Read.Read (Biobase.Fasta.Strict.Fasta which ty)
instance forall k1 (which :: k1) k2 (ty :: k2). GHC.Classes.Ord (Biobase.Fasta.Strict.Fasta which ty)
instance forall k1 (which :: k1) k2 (ty :: k2). GHC.Classes.Eq (Biobase.Fasta.Strict.Fasta which ty)