{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE UndecidableInstances #-}
module Bio.Data.Fasta
( FastaLike(..)
, fastaReader
) where
import Bio.Motif
import Bio.Seq
import qualified Data.ByteString.Char8 as B
import Conduit
class FastaLike f where
fromFastaRecord :: (B.ByteString, [B.ByteString]) -> f
readFasta :: FilePath -> ConduitT i f (ResourceT IO) ()
readFasta fl = fastaReader fl .| mapC fromFastaRecord
readFasta' :: FilePath -> IO [f]
readFasta' fl = runResourceT $ runConduit $ readFasta fl .| sinkList
{-# MINIMAL fromFastaRecord #-}
instance BioSeq s a => FastaLike (s a) where
fromFastaRecord (_, xs) = case fromBS (B.concat xs) of
Left err -> error err
Right x -> x
{-# INLINE fromFastaRecord #-}
instance FastaLike Motif where
fromFastaRecord (name, mat) = Motif name (toPWM mat)
{-# INLINE fromFastaRecord #-}
fastaReader :: FilePath
-> ConduitT i (B.ByteString, [B.ByteString]) (ResourceT IO) ()
fastaReader fl = sourceFile fl .| linesUnboundedAsciiC .| loop []
where
loop acc = do
x <- await
case x of
Just l -> case () of
_ | B.null l -> loop acc
| B.head l == '>' -> output (reverse acc) >> loop [B.tail l]
| otherwise -> loop (l:acc)
Nothing -> output $ reverse acc
output (x:xs) = yield (x, xs)
output _ = return ()
{-# INLINE fastaReader #-}