{-# LANGUAGE OverloadedStrings #-}
module SequenceFormats.Fasta (readNextFastaEntry, loadFastaChrom) where
import SequenceFormats.Utils (Chrom (..))
import Control.Exception.Base (AssertionFailed (..),
throwIO)
import Control.Monad (void)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Control.Monad.Trans.State.Strict (runStateT)
import qualified Data.Attoparsec.ByteString.Char8 as A
import qualified Data.ByteString.Char8 as B
import Data.Char (isSpace)
import Lens.Family2 (view)
import Pipes (Producer, next, runEffect,
(>->))
import Pipes.Attoparsec (parse)
import qualified Pipes.ByteString as P
import Pipes.Prelude (drain)
import System.IO (Handle, hPutStr, stderr)
loadFastaChrom :: Handle -> Chrom -> IO (Producer B.ByteString IO ())
loadFastaChrom :: Handle -> Chrom -> IO (Producer ByteString IO ())
loadFastaChrom Handle
refFileHandle Chrom
chrom = do
let prod :: Proxy x' x () ByteString IO ()
prod = forall (m :: * -> *).
MonadIO m =>
Handle -> Producer' ByteString m ()
P.fromHandle Handle
refFileHandle
Producer ByteString IO () -> IO (Producer ByteString IO ())
go forall {x'} {x}. Proxy x' x () ByteString IO ()
prod
where
go :: Producer ByteString IO () -> IO (Producer ByteString IO ())
go Producer ByteString IO ()
prod = do
(Chrom
chrom_, Producer ByteString IO (Producer ByteString IO ())
prod') <- forall (m :: * -> *).
MonadIO m =>
Producer ByteString m ()
-> m (Chrom, Producer ByteString m (Producer ByteString m ()))
readNextFastaEntry Producer ByteString IO ()
prod
Handle -> String -> IO ()
hPutStr Handle
stderr (String
"found chromosome " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show Chrom
chrom_)
if Chrom
chrom_ forall a. Eq a => a -> a -> Bool
== Chrom
chrom
then forall (m :: * -> *) a. Monad m => a -> m a
return (forall (f :: * -> *) a. Functor f => f a -> f ()
void Producer ByteString IO (Producer ByteString IO ())
prod')
else do
Producer ByteString IO ()
newProd <- forall (m :: * -> *) r. Monad m => Effect m r -> m r
runEffect forall a b. (a -> b) -> a -> b
$ Producer ByteString IO (Producer ByteString IO ())
prod' forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> forall (m :: * -> *) a r. Functor m => Consumer' a m r
drain
Producer ByteString IO () -> IO (Producer ByteString IO ())
go Producer ByteString IO ()
newProd
readNextFastaEntry :: (MonadIO m) => Producer B.ByteString m () ->
m (Chrom, Producer B.ByteString m (Producer B.ByteString m ()))
readNextFastaEntry :: forall (m :: * -> *).
MonadIO m =>
Producer ByteString m ()
-> m (Chrom, Producer ByteString m (Producer ByteString m ()))
readNextFastaEntry Producer ByteString m ()
prod = do
(Maybe (Either ParsingError Chrom)
res, Producer ByteString m ()
rest) <- forall s (m :: * -> *) a. StateT s m a -> s -> m (a, s)
runStateT (forall (m :: * -> *) a b.
(Monad m, ParserInput a) =>
Parser a b -> Parser a m (Maybe (Either ParsingError b))
parse Parser Chrom
fastaHeaderLineParser) Producer ByteString m ()
prod
Chrom
header <- case Maybe (Either ParsingError Chrom)
res of
Maybe (Either ParsingError Chrom)
Nothing -> forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall e a. Exception e => e -> IO a
throwIO forall a b. (a -> b) -> a -> b
$ String -> AssertionFailed
AssertionFailed String
"Could not find chromosome. Fasta file exhausted."
Just (Left ParsingError
e_) -> do
Either () (ByteString, Producer ByteString m ())
x <- forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer ByteString m ()
rest
case Either () (ByteString, Producer ByteString m ())
x of
(Right (ByteString
chunk, Producer ByteString m ()
_)) -> do
let msg :: String
msg = forall a. Show a => a -> String
show ParsingError
e_ forall a. [a] -> [a] -> [a]
++ ByteString -> String
B.unpack ByteString
chunk
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall e a. Exception e => e -> IO a
throwIO forall a b. (a -> b) -> a -> b
$ String -> AssertionFailed
AssertionFailed (String
"Fasta header parsing error: " forall a. [a] -> [a] -> [a]
++ String
msg)
Either () (ByteString, Producer ByteString m ())
_ -> forall a. HasCallStack => String -> a
error String
"should not happen"
Just (Right Chrom
h) -> forall (m :: * -> *) a. Monad m => a -> m a
return Chrom
h
forall (m :: * -> *) a. Monad m => a -> m a
return (Chrom
header, forall a s t b. FoldLike a s t a b -> s -> a
view (forall (m :: * -> *) x.
Monad m =>
(Word8 -> Bool)
-> Lens'
(Producer ByteString m x)
(Producer ByteString m (Producer ByteString m x))
P.break (forall a. Eq a => a -> a -> Bool
==Word8
62)) Producer ByteString m ()
rest forall (m :: * -> *) a' a b r c' c.
Functor m =>
Proxy a' a () b m r -> Proxy () b c' c m r -> Proxy a' a c' c m r
>-> forall (m :: * -> *) r.
Monad m =>
(Word8 -> Bool) -> Pipe ByteString ByteString m r
P.filter (\Word8
c -> Word8
c forall a. Eq a => a -> a -> Bool
/= Word8
10 Bool -> Bool -> Bool
&& Word8
c forall a. Eq a => a -> a -> Bool
/= Word8
13))
fastaHeaderLineParser :: A.Parser Chrom
= do
Char
_ <- Char -> Parser Char
A.char Char
'>'
ByteString
chrom <- (Char -> Bool) -> Parser ByteString
A.takeWhile forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace
(Char -> Bool) -> Parser ()
A.skipWhile (\Char
c -> Char
c forall a. Eq a => a -> a -> Bool
/= Char
'\n' Bool -> Bool -> Bool
&& Char
c forall a. Eq a => a -> a -> Bool
/= Char
'\r')
Parser ()
A.endOfLine
forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Chrom
Chrom forall a b. (a -> b) -> a -> b
$ ByteString
chrom