{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE UndecidableInstances #-} module Bio.Data.Fasta ( FastaLike(..) , fastaReader ) where import Bio.Motif import Bio.Seq import qualified Data.ByteString.Char8 as B import Conduit class FastaLike f where -- | Convert a FASTA record, consisting of a record header and a record body, -- to a specific data type fromFastaRecord :: (B.ByteString, [B.ByteString]) -> f readFasta :: FilePath -> ConduitT i f (ResourceT IO) () readFasta fl = fastaReader fl .| mapC fromFastaRecord -- | non-stream version, read whole file in memory readFasta' :: FilePath -> IO [f] readFasta' fl = runResourceT $ runConduit $ readFasta fl .| sinkList {-# MINIMAL fromFastaRecord #-} instance BioSeq s a => FastaLike (s a) where fromFastaRecord (_, xs) = case fromBS (B.concat xs) of Left err -> error err Right x -> x {-# INLINE fromFastaRecord #-} instance FastaLike Motif where fromFastaRecord (name, mat) = Motif name (toPWM mat) {-# INLINE fromFastaRecord #-} fastaReader :: FilePath -> ConduitT i (B.ByteString, [B.ByteString]) (ResourceT IO) () fastaReader fl = sourceFile fl .| linesUnboundedAsciiC .| loop [] where loop acc = do x <- await case x of Just l -> case () of _ | B.null l -> loop acc -- empty line, go to next line | B.head l == '>' -> output (reverse acc) >> loop [B.tail l] | otherwise -> loop (l:acc) Nothing -> output $ reverse acc output (x:xs) = yield (x, xs) output _ = return () {-# INLINE fastaReader #-}