{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
module Bio.Data.Fastq
( Fastq(..)
, streamFastqGzip
, streamFastq
, sinkFastqGzip
, sinkFastq
, parseFastqC
, parseFastqC'
, fastqToByteString
, qualitySummary
, trimPolyA
) where
import Conduit
import Data.Conduit.Zlib (ungzip, multiple, gzip)
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString as BS
import qualified Data.Attoparsec.ByteString as A
import Data.Attoparsec.ByteString.Char8
import Data.Conduit.Attoparsec
import GHC.Generics (Generic)
import Control.DeepSeq
data Fastq = Fastq
{ Fastq -> ByteString
fastqSeqId :: B.ByteString
, Fastq -> ByteString
fastqSeq :: B.ByteString
, Fastq -> ByteString
fastqSeqQual :: B.ByteString
} deriving (Int -> Fastq -> ShowS
[Fastq] -> ShowS
Fastq -> String
(Int -> Fastq -> ShowS)
-> (Fastq -> String) -> ([Fastq] -> ShowS) -> Show Fastq
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Fastq] -> ShowS
$cshowList :: [Fastq] -> ShowS
show :: Fastq -> String
$cshow :: Fastq -> String
showsPrec :: Int -> Fastq -> ShowS
$cshowsPrec :: Int -> Fastq -> ShowS
Show, Fastq -> Fastq -> Bool
(Fastq -> Fastq -> Bool) -> (Fastq -> Fastq -> Bool) -> Eq Fastq
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Fastq -> Fastq -> Bool
$c/= :: Fastq -> Fastq -> Bool
== :: Fastq -> Fastq -> Bool
$c== :: Fastq -> Fastq -> Bool
Eq, (forall x. Fastq -> Rep Fastq x)
-> (forall x. Rep Fastq x -> Fastq) -> Generic Fastq
forall x. Rep Fastq x -> Fastq
forall x. Fastq -> Rep Fastq x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Fastq x -> Fastq
$cfrom :: forall x. Fastq -> Rep Fastq x
Generic, Fastq -> ()
(Fastq -> ()) -> NFData Fastq
forall a. (a -> ()) -> NFData a
rnf :: Fastq -> ()
$crnf :: Fastq -> ()
NFData)
streamFastqGzip :: (PrimMonad m, MonadThrow m, MonadResource m)
=> FilePath -> ConduitT i Fastq m ()
streamFastqGzip :: String -> ConduitT i Fastq m ()
streamFastqGzip String
fl = String -> ConduitT i ByteString m ()
forall (m :: * -> *) i.
MonadResource m =>
String -> ConduitT i ByteString m ()
sourceFileBS String
fl ConduitT i ByteString m ()
-> ConduitM ByteString Fastq m () -> ConduitT i Fastq m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitT ByteString ByteString m ()
-> ConduitT ByteString ByteString m ()
forall (m :: * -> *) a.
Monad m =>
ConduitT ByteString a m () -> ConduitT ByteString a m ()
multiple ConduitT ByteString ByteString m ()
forall (m :: * -> *).
(PrimMonad m, MonadThrow m) =>
ConduitT ByteString ByteString m ()
ungzip ConduitT ByteString ByteString m ()
-> ConduitM ByteString Fastq m () -> ConduitM ByteString Fastq m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM ByteString Fastq m ()
forall (m :: * -> *). Monad m => ConduitT ByteString Fastq m ()
parseFastqC
streamFastq :: (MonadResource m, MonadThrow m)
=> FilePath -> ConduitT i Fastq m ()
streamFastq :: String -> ConduitT i Fastq m ()
streamFastq String
fl = String -> ConduitT i ByteString m ()
forall (m :: * -> *) i.
MonadResource m =>
String -> ConduitT i ByteString m ()
sourceFileBS String
fl ConduitT i ByteString m ()
-> ConduitM ByteString Fastq m () -> ConduitT i Fastq m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM ByteString Fastq m ()
forall (m :: * -> *). Monad m => ConduitT ByteString Fastq m ()
parseFastqC
sinkFastq :: (MonadResource m, MonadThrow m)
=> FilePath -> ConduitT Fastq o m ()
sinkFastq :: String -> ConduitT Fastq o m ()
sinkFastq String
fl = (Fastq -> ByteString) -> ConduitT Fastq ByteString m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
mapC Fastq -> ByteString
fastqToByteString ConduitT Fastq ByteString m ()
-> ConduitM ByteString o m () -> ConduitT Fastq o m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitT ByteString ByteString m ()
forall (m :: * -> *) seq.
(Monad m, IsSequence seq, Element seq ~ Word8) =>
ConduitT seq seq m ()
unlinesAsciiC ConduitT ByteString ByteString m ()
-> ConduitM ByteString o m () -> ConduitM ByteString o m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| String -> ConduitM ByteString o m ()
forall (m :: * -> *) o.
MonadResource m =>
String -> ConduitT ByteString o m ()
sinkFileBS String
fl
sinkFastqGzip :: (PrimMonad m, MonadThrow m, MonadResource m)
=> FilePath -> ConduitT Fastq o m ()
sinkFastqGzip :: String -> ConduitT Fastq o m ()
sinkFastqGzip String
fl = (Fastq -> ByteString) -> ConduitT Fastq ByteString m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
mapC Fastq -> ByteString
fastqToByteString ConduitT Fastq ByteString m ()
-> ConduitM ByteString o m () -> ConduitT Fastq o m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitT ByteString ByteString m ()
forall (m :: * -> *) seq.
(Monad m, IsSequence seq, Element seq ~ Word8) =>
ConduitT seq seq m ()
unlinesAsciiC ConduitT ByteString ByteString m ()
-> ConduitM ByteString o m () -> ConduitM ByteString o m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitT ByteString ByteString m ()
forall (m :: * -> *).
(MonadThrow m, PrimMonad m) =>
ConduitT ByteString ByteString m ()
gzip ConduitT ByteString ByteString m ()
-> ConduitM ByteString o m () -> ConduitM ByteString o m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| String -> ConduitM ByteString o m ()
forall (m :: * -> *) o.
MonadResource m =>
String -> ConduitT ByteString o m ()
sinkFileBS String
fl
data FQBuilder = Init FQBuilder
| FQ1 B.ByteString FQBuilder
| FQ2 B.ByteString FQBuilder
| FQ3 B.ByteString FQBuilder
| Complete
fqBuilder :: FQBuilder -> Fastq
fqBuilder :: FQBuilder -> Fastq
fqBuilder = ([ByteString], [ByteString], [ByteString]) -> FQBuilder -> Fastq
go ([], [], [])
where
go :: ([ByteString], [ByteString], [ByteString]) -> FQBuilder -> Fastq
go ([ByteString], [ByteString], [ByteString])
acc (Init FQBuilder
bldr) = ([ByteString], [ByteString], [ByteString]) -> FQBuilder -> Fastq
go ([ByteString], [ByteString], [ByteString])
acc FQBuilder
bldr
go ([ByteString]
f1,[ByteString]
f2,[ByteString]
f3) (FQ1 ByteString
x FQBuilder
bldr) = ([ByteString], [ByteString], [ByteString]) -> FQBuilder -> Fastq
go (ByteString
xByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:[ByteString]
f1, [ByteString]
f2, [ByteString]
f3) FQBuilder
bldr
go ([ByteString]
f1,[ByteString]
f2,[ByteString]
f3) (FQ2 ByteString
x FQBuilder
bldr) = ([ByteString], [ByteString], [ByteString]) -> FQBuilder -> Fastq
go ([ByteString]
f1, ByteString
xByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:[ByteString]
f2, [ByteString]
f3) FQBuilder
bldr
go ([ByteString]
f1,[ByteString]
f2,[ByteString]
f3) (FQ3 ByteString
x FQBuilder
bldr) = ([ByteString], [ByteString], [ByteString]) -> FQBuilder -> Fastq
go ([ByteString]
f1, [ByteString]
f2, ByteString
xByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:[ByteString]
f3) FQBuilder
bldr
go ([ByteString]
f1,[ByteString]
f2,[ByteString]
f3) FQBuilder
Complete = ByteString -> ByteString -> ByteString -> Fastq
Fastq ([ByteString] -> ByteString
B.concat ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString] -> [ByteString]
forall a. [a] -> [a]
reverse [ByteString]
f1)
([ByteString] -> ByteString
B.concat ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString] -> [ByteString]
forall a. [a] -> [a]
reverse [ByteString]
f2) ([ByteString] -> ByteString
B.concat ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString] -> [ByteString]
forall a. [a] -> [a]
reverse [ByteString]
f3)
{-# INLINE fqBuilder #-}
parseFastqC :: Monad m => ConduitT B.ByteString Fastq m ()
parseFastqC :: ConduitT ByteString Fastq m ()
parseFastqC = ConduitT ByteString Fastq m (Maybe ByteString)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT ByteString Fastq m (Maybe ByteString)
-> (Maybe ByteString -> ConduitT ByteString Fastq m ())
-> ConduitT ByteString Fastq m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ConduitT ByteString Fastq m ()
-> (ByteString -> ConduitT ByteString Fastq m ())
-> Maybe ByteString
-> ConduitT ByteString Fastq m ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (String -> ConduitT ByteString Fastq m ()
forall a. HasCallStack => String -> a
error String
"Empty input") ( \ByteString
x -> do
if ByteString -> Char
B.head ByteString
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'@'
then (FQBuilder -> FQBuilder)
-> Char -> ByteString -> ConduitT ByteString Fastq m ()
forall (m :: * -> *).
Monad m =>
(FQBuilder -> FQBuilder)
-> Char -> ByteString -> ConduitT ByteString Fastq m ()
loop FQBuilder -> FQBuilder
Init Char
'a' (ByteString -> ConduitT ByteString Fastq m ())
-> ByteString -> ConduitT ByteString Fastq m ()
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
B.tail ByteString
x
else String -> ConduitT ByteString Fastq m ()
forall a. HasCallStack => String -> a
error String
"Record does not start with \'@\'" )
where
tryRead1 :: ByteString -> ConduitT ByteString o m ByteString
tryRead1 ByteString
input | ByteString -> Bool
B.null ByteString
input = ConduitT ByteString o m (Maybe ByteString)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT ByteString o m (Maybe ByteString)
-> (Maybe ByteString -> ConduitT ByteString o m ByteString)
-> ConduitT ByteString o m ByteString
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ConduitT ByteString o m ByteString
-> (ByteString -> ConduitT ByteString o m ByteString)
-> Maybe ByteString
-> ConduitT ByteString o m ByteString
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (String -> ConduitT ByteString o m ByteString
forall a. HasCallStack => String -> a
error String
"Unexpected EOF") ByteString -> ConduitT ByteString o m ByteString
forall (m :: * -> *) a. Monad m => a -> m a
return
| Bool
otherwise = ByteString -> ConduitT ByteString o m ByteString
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
input
loop :: (FQBuilder -> FQBuilder)
-> Char -> ByteString -> ConduitT ByteString Fastq m ()
loop FQBuilder -> FQBuilder
acc Char
st ByteString
input = case Char
st of
Char
'a' -> do
(ByteString
x, ByteString
rest) <- (Char -> Bool) -> ByteString -> (ByteString, ByteString)
B.break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'\n') (ByteString -> (ByteString, ByteString))
-> ConduitT ByteString Fastq m ByteString
-> ConduitT ByteString Fastq m (ByteString, ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ByteString -> ConduitT ByteString Fastq m ByteString
forall (m :: * -> *) o.
Monad m =>
ByteString -> ConduitT ByteString o m ByteString
tryRead1 ByteString
input
if ByteString -> Bool
B.null ByteString
rest
then (FQBuilder -> FQBuilder)
-> Char -> ByteString -> ConduitT ByteString Fastq m ()
loop (FQBuilder -> FQBuilder
acc (FQBuilder -> FQBuilder)
-> (FQBuilder -> FQBuilder) -> FQBuilder -> FQBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> FQBuilder -> FQBuilder
FQ1 ByteString
x) Char
'a' ByteString
rest
else (FQBuilder -> FQBuilder)
-> Char -> ByteString -> ConduitT ByteString Fastq m ()
loop (FQBuilder -> FQBuilder
acc (FQBuilder -> FQBuilder)
-> (FQBuilder -> FQBuilder) -> FQBuilder -> FQBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> FQBuilder -> FQBuilder
FQ1 ByteString
x) Char
'b' (ByteString -> ConduitT ByteString Fastq m ())
-> ByteString -> ConduitT ByteString Fastq m ()
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
B.tail ByteString
rest
Char
'b' -> do
(ByteString
x, ByteString
rest) <- (Char -> Bool) -> ByteString -> (ByteString, ByteString)
B.break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'\n') (ByteString -> (ByteString, ByteString))
-> ConduitT ByteString Fastq m ByteString
-> ConduitT ByteString Fastq m (ByteString, ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ByteString -> ConduitT ByteString Fastq m ByteString
forall (m :: * -> *) o.
Monad m =>
ByteString -> ConduitT ByteString o m ByteString
tryRead1 ByteString
input
if ByteString -> Bool
B.null ByteString
rest
then (FQBuilder -> FQBuilder)
-> Char -> ByteString -> ConduitT ByteString Fastq m ()
loop (FQBuilder -> FQBuilder
acc (FQBuilder -> FQBuilder)
-> (FQBuilder -> FQBuilder) -> FQBuilder -> FQBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> FQBuilder -> FQBuilder
FQ2 ByteString
x) Char
'b' ByteString
rest
else (FQBuilder -> FQBuilder)
-> Char -> ByteString -> ConduitT ByteString Fastq m ()
loop (FQBuilder -> FQBuilder
acc (FQBuilder -> FQBuilder)
-> (FQBuilder -> FQBuilder) -> FQBuilder -> FQBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> FQBuilder -> FQBuilder
FQ2 ByteString
x) Char
'B' (ByteString -> ConduitT ByteString Fastq m ())
-> ByteString -> ConduitT ByteString Fastq m ()
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
B.tail ByteString
rest
Char
'B' -> do
ByteString
input' <- ByteString -> ConduitT ByteString Fastq m ByteString
forall (m :: * -> *) o.
Monad m =>
ByteString -> ConduitT ByteString o m ByteString
tryRead1 ByteString
input
if ByteString -> Char
B.head ByteString
input' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'+'
then (FQBuilder -> FQBuilder)
-> Char -> ByteString -> ConduitT ByteString Fastq m ()
loop FQBuilder -> FQBuilder
acc Char
'c' (ByteString -> ConduitT ByteString Fastq m ())
-> ByteString -> ConduitT ByteString Fastq m ()
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
B.tail ByteString
input'
else do
let (ByteString
x, ByteString
rest) = (Char -> Bool) -> ByteString -> (ByteString, ByteString)
B.break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'\n') ByteString
input'
if ByteString -> Bool
B.null ByteString
rest
then (FQBuilder -> FQBuilder)
-> Char -> ByteString -> ConduitT ByteString Fastq m ()
loop (FQBuilder -> FQBuilder
acc (FQBuilder -> FQBuilder)
-> (FQBuilder -> FQBuilder) -> FQBuilder -> FQBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> FQBuilder -> FQBuilder
FQ2 ByteString
x) Char
'b' ByteString
rest
else (FQBuilder -> FQBuilder)
-> Char -> ByteString -> ConduitT ByteString Fastq m ()
loop (FQBuilder -> FQBuilder
acc (FQBuilder -> FQBuilder)
-> (FQBuilder -> FQBuilder) -> FQBuilder -> FQBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> FQBuilder -> FQBuilder
FQ2 ByteString
x) Char
'B' (ByteString -> ConduitT ByteString Fastq m ())
-> ByteString -> ConduitT ByteString Fastq m ()
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
B.tail ByteString
rest
Char
'c' -> do
(ByteString
x, ByteString
rest) <- (Char -> Bool) -> ByteString -> (ByteString, ByteString)
B.break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'\n') (ByteString -> (ByteString, ByteString))
-> ConduitT ByteString Fastq m ByteString
-> ConduitT ByteString Fastq m (ByteString, ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ByteString -> ConduitT ByteString Fastq m ByteString
forall (m :: * -> *) o.
Monad m =>
ByteString -> ConduitT ByteString o m ByteString
tryRead1 ByteString
input
if ByteString -> Bool
B.null ByteString
rest
then (FQBuilder -> FQBuilder)
-> Char -> ByteString -> ConduitT ByteString Fastq m ()
loop FQBuilder -> FQBuilder
acc Char
'c' ByteString
rest
else (FQBuilder -> FQBuilder)
-> Char -> ByteString -> ConduitT ByteString Fastq m ()
loop FQBuilder -> FQBuilder
acc Char
'd' (ByteString -> ConduitT ByteString Fastq m ())
-> ByteString -> ConduitT ByteString Fastq m ()
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
B.tail ByteString
rest
Char
'd' -> do
(ByteString
x, ByteString
rest) <- (Char -> Bool) -> ByteString -> (ByteString, ByteString)
B.break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'\n') (ByteString -> (ByteString, ByteString))
-> ConduitT ByteString Fastq m ByteString
-> ConduitT ByteString Fastq m (ByteString, ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ByteString -> ConduitT ByteString Fastq m ByteString
forall (m :: * -> *) o.
Monad m =>
ByteString -> ConduitT ByteString o m ByteString
tryRead1 ByteString
input
if ByteString -> Bool
B.null ByteString
rest
then (FQBuilder -> FQBuilder)
-> Char -> ByteString -> ConduitT ByteString Fastq m ()
loop (FQBuilder -> FQBuilder
acc (FQBuilder -> FQBuilder)
-> (FQBuilder -> FQBuilder) -> FQBuilder -> FQBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> FQBuilder -> FQBuilder
FQ3 ByteString
x) Char
'd' ByteString
rest
else (FQBuilder -> FQBuilder)
-> Char -> ByteString -> ConduitT ByteString Fastq m ()
loop (FQBuilder -> FQBuilder
acc (FQBuilder -> FQBuilder)
-> (FQBuilder -> FQBuilder) -> FQBuilder -> FQBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> FQBuilder -> FQBuilder
FQ3 ByteString
x) Char
'D' (ByteString -> ConduitT ByteString Fastq m ())
-> ByteString -> ConduitT ByteString Fastq m ()
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
B.tail ByteString
rest
Char
'D' -> if ByteString -> Bool
B.null ByteString
input
then ConduitT ByteString Fastq m (Maybe ByteString)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT ByteString Fastq m (Maybe ByteString)
-> (Maybe ByteString -> ConduitT ByteString Fastq m ())
-> ConduitT ByteString Fastq m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
Maybe ByteString
Nothing -> Fastq -> ConduitT ByteString Fastq m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (Fastq -> ConduitT ByteString Fastq m ())
-> Fastq -> ConduitT ByteString Fastq m ()
forall a b. (a -> b) -> a -> b
$ FQBuilder -> Fastq
fqBuilder (FQBuilder -> Fastq) -> FQBuilder -> Fastq
forall a b. (a -> b) -> a -> b
$ FQBuilder -> FQBuilder
acc FQBuilder
Complete
Just ByteString
input' -> if ByteString -> Char
B.head ByteString
input' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'@'
then do
Fastq -> ConduitT ByteString Fastq m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (Fastq -> ConduitT ByteString Fastq m ())
-> Fastq -> ConduitT ByteString Fastq m ()
forall a b. (a -> b) -> a -> b
$ FQBuilder -> Fastq
fqBuilder (FQBuilder -> Fastq) -> FQBuilder -> Fastq
forall a b. (a -> b) -> a -> b
$ FQBuilder -> FQBuilder
acc FQBuilder
Complete
(FQBuilder -> FQBuilder)
-> Char -> ByteString -> ConduitT ByteString Fastq m ()
loop FQBuilder -> FQBuilder
Init Char
'a' (ByteString -> ConduitT ByteString Fastq m ())
-> ByteString -> ConduitT ByteString Fastq m ()
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
B.tail ByteString
input'
else (FQBuilder -> FQBuilder)
-> Char -> ByteString -> ConduitT ByteString Fastq m ()
loop FQBuilder -> FQBuilder
acc Char
'd' ByteString
input'
else if ByteString -> Char
B.head ByteString
input Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'@'
then do
Fastq -> ConduitT ByteString Fastq m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (Fastq -> ConduitT ByteString Fastq m ())
-> Fastq -> ConduitT ByteString Fastq m ()
forall a b. (a -> b) -> a -> b
$ FQBuilder -> Fastq
fqBuilder (FQBuilder -> Fastq) -> FQBuilder -> Fastq
forall a b. (a -> b) -> a -> b
$ FQBuilder -> FQBuilder
acc FQBuilder
Complete
(FQBuilder -> FQBuilder)
-> Char -> ByteString -> ConduitT ByteString Fastq m ()
loop FQBuilder -> FQBuilder
Init Char
'a' (ByteString -> ConduitT ByteString Fastq m ())
-> ByteString -> ConduitT ByteString Fastq m ()
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
B.tail ByteString
input
else (FQBuilder -> FQBuilder)
-> Char -> ByteString -> ConduitT ByteString Fastq m ()
loop FQBuilder -> FQBuilder
acc Char
'd' ByteString
input
{-# INLINE parseFastqC #-}
parseFastqC' :: MonadThrow m => ConduitT B.ByteString Fastq m ()
parseFastqC' :: ConduitT ByteString Fastq m ()
parseFastqC' = Parser ByteString Fastq
-> ConduitT ByteString (PositionRange, Fastq) m ()
forall a (m :: * -> *) b.
(AttoparsecInput a, MonadThrow m) =>
Parser a b -> ConduitT a (PositionRange, b) m ()
conduitParser Parser ByteString Fastq
fastqParser ConduitT ByteString (PositionRange, Fastq) m ()
-> ConduitM (PositionRange, Fastq) Fastq m ()
-> ConduitT ByteString Fastq m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ((PositionRange, Fastq) -> Fastq)
-> ConduitM (PositionRange, Fastq) Fastq m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
mapC (PositionRange, Fastq) -> Fastq
forall a b. (a, b) -> b
snd
where
fastqParser :: Parser ByteString Fastq
fastqParser = do
Word8
_ <- (Char -> Bool) -> Parser ()
skipWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'@') Parser () -> Parser ByteString Word8 -> Parser ByteString Word8
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> Parser ByteString Word8
char8 Char
'@'
ByteString
ident <- (Word8 -> Bool) -> Parser ByteString
A.takeTill Word8 -> Bool
isEndOfLine
Parser ()
endOfLine
ByteString
sequ <- (Word8 -> Bool) -> ByteString -> ByteString
BS.filter (Bool -> Bool
not (Bool -> Bool) -> (Word8 -> Bool) -> Word8 -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Bool
isEndOfLine) (ByteString -> ByteString)
-> Parser ByteString -> Parser ByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'+')
Char -> Parser ByteString Word8
char8 Char
'+' Parser ByteString Word8 -> Parser () -> Parser ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Word8 -> Bool) -> Parser ()
A.skipWhile (Bool -> Bool
not (Bool -> Bool) -> (Word8 -> Bool) -> Word8 -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Bool
isEndOfLine) Parser () -> Parser () -> Parser ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
endOfLine
ByteString
score <- (Word8 -> Bool) -> ByteString -> ByteString
BS.filter (Bool -> Bool
not (Bool -> Bool) -> (Word8 -> Bool) -> Word8 -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Bool
isEndOfLine) (ByteString -> ByteString)
-> Parser ByteString -> Parser ByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
Int -> (Int -> Word8 -> Maybe Int) -> Parser ByteString
forall s. s -> (s -> Word8 -> Maybe s) -> Parser ByteString
A.scan Int
0 (Int -> Int -> Word8 -> Maybe Int
forall a. (Ord a, Num a) => a -> a -> Word8 -> Maybe a
f (ByteString -> Int
B.length ByteString
sequ))
(Char -> Bool) -> Parser ()
skipWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'@')
Fastq -> Parser ByteString Fastq
forall (m :: * -> *) a. Monad m => a -> m a
return (Fastq -> Parser ByteString Fastq)
-> Fastq -> Parser ByteString Fastq
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString -> ByteString -> Fastq
Fastq ByteString
ident ByteString
sequ ByteString
score
where
f :: a -> a -> Word8 -> Maybe a
f a
n a
i Word8
x | a
i a -> a -> Bool
forall a. Ord a => a -> a -> Bool
>= a
n = Maybe a
forall a. Maybe a
Nothing
| Word8 -> Bool
isEndOfLine Word8
x = a -> Maybe a
forall a. a -> Maybe a
Just a
i
| Bool
otherwise = a -> Maybe a
forall a. a -> Maybe a
Just (a -> Maybe a) -> a -> Maybe a
forall a b. (a -> b) -> a -> b
$ a
i a -> a -> a
forall a. Num a => a -> a -> a
+ a
1
{-# INLINE parseFastqC' #-}
fastqToByteString :: Fastq -> B.ByteString
fastqToByteString :: Fastq -> ByteString
fastqToByteString (Fastq ByteString
a ByteString
b ByteString
c) = [ByteString] -> ByteString
B.concat [ByteString
"@", ByteString
a, ByteString
"\n", ByteString
b, ByteString
"\n+\n", ByteString
c]
{-# INLINE fastqToByteString #-}
qualitySummary :: Monad m => ConduitT Fastq o m [(Double, Double)]
qualitySummary :: ConduitT Fastq o m [(Double, Double)]
qualitySummary = (Fastq -> [Double]) -> ConduitT Fastq [Double] m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
mapC ((Int -> Double) -> [Int] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral ([Int] -> [Double]) -> (Fastq -> [Int]) -> Fastq -> [Double]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Fastq -> [Int]
decodeQualSc) ConduitT Fastq [Double] m ()
-> ConduitM [Double] o m [(Double, Double)]
-> ConduitT Fastq o m [(Double, Double)]
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM [Double] o m [(Double, Double)]
forall (m :: * -> *) o.
Monad m =>
ConduitT [Double] o m [(Double, Double)]
meanVarianceC
meanVarianceC :: Monad m => ConduitT [Double] o m [(Double, Double)]
meanVarianceC :: ConduitT [Double] o m [(Double, Double)]
meanVarianceC = ConduitT [Double] o m (Maybe [Double])
forall (m :: * -> *) a o. Monad m => ConduitT a o m (Maybe a)
peekC ConduitT [Double] o m (Maybe [Double])
-> (Maybe [Double] -> ConduitT [Double] o m [(Double, Double)])
-> ConduitT [Double] o m [(Double, Double)]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
Maybe [Double]
Nothing -> String -> ConduitT [Double] o m [(Double, Double)]
forall a. HasCallStack => String -> a
error String
"Empty input"
Just [Double]
x -> ([(Double, Double)], Int) -> [(Double, Double)]
forall a b. (a, b) -> a
fst (([(Double, Double)], Int) -> [(Double, Double)])
-> ConduitT [Double] o m ([(Double, Double)], Int)
-> ConduitT [Double] o m [(Double, Double)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (([(Double, Double)], Int)
-> [Double] -> ([(Double, Double)], Int))
-> ([(Double, Double)], Int)
-> ConduitT [Double] o m ([(Double, Double)], Int)
forall (m :: * -> *) a b o.
Monad m =>
(a -> b -> a) -> a -> ConduitT b o m a
foldlC ([(Double, Double)], Int) -> [Double] -> ([(Double, Double)], Int)
forall b b.
(Integral b, Fractional b) =>
([(b, b)], b) -> [b] -> ([(b, b)], b)
f (Int -> (Double, Double) -> [(Double, Double)]
forall a. Int -> a -> [a]
replicate ([Double] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
x) (Double
0,Double
0), Int
0 :: Int)
where
f :: ([(b, b)], b) -> [b] -> ([(b, b)], b)
f ([(b, b)]
acc, b
n) [b]
xs = let acc' :: [(b, b)]
acc' = ((b, b) -> b -> (b, b)) -> [(b, b)] -> [b] -> [(b, b)]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (b, b) -> b -> (b, b)
forall b. Fractional b => (b, b) -> b -> (b, b)
g [(b, b)]
acc [b]
xs in ([(b, b)]
acc', b
n')
where
n' :: b
n' = b
n b -> b -> b
forall a. Num a => a -> a -> a
+ b
1
g :: (b, b) -> b -> (b, b)
g (b
m, b
s) b
x = (b
m', b
s')
where
m' :: b
m' = b
m b -> b -> b
forall a. Num a => a -> a -> a
+ b
d b -> b -> b
forall a. Fractional a => a -> a -> a
/ b -> b
forall a b. (Integral a, Num b) => a -> b
fromIntegral b
n'
s' :: b
s' = b
s b -> b -> b
forall a. Num a => a -> a -> a
+ b
d b -> b -> b
forall a. Num a => a -> a -> a
* (b
x b -> b -> b
forall a. Num a => a -> a -> a
- b
m')
d :: b
d = b
x b -> b -> b
forall a. Num a => a -> a -> a
- b
m
{-# INLINE meanVarianceC #-}
decodeQualSc :: Fastq -> [Int]
decodeQualSc :: Fastq -> [Int]
decodeQualSc = (Word8 -> Int) -> [Word8] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8 -> Int) -> (Word8 -> Word8) -> Word8 -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (\Word8
x -> Word8
x Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Word8
33)) ([Word8] -> [Int]) -> (Fastq -> [Word8]) -> Fastq -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [Word8]
BS.unpack (ByteString -> [Word8])
-> (Fastq -> ByteString) -> Fastq -> [Word8]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.Fastq -> ByteString
fastqSeqQual
{-# INLINE decodeQualSc #-}
pError :: Int -> Double
pError :: Int -> Double
pError Int
x = Double
10 Double -> Double -> Double
forall a. Floating a => a -> a -> a
** (Double -> Double
forall a. Num a => a -> a
negate (Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
x) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
10)
{-# INLINE pError #-}
trimPolyA :: Int -> Fastq -> Fastq
trimPolyA :: Int -> Fastq -> Fastq
trimPolyA Int
n f :: Fastq
f@(Fastq ByteString
a ByteString
b ByteString
c)
| ByteString -> Int
B.length ByteString
trailing Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
n = ByteString -> ByteString -> ByteString -> Fastq
Fastq ByteString
a ByteString
b' (ByteString -> Fastq) -> ByteString -> Fastq
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
B.take (ByteString -> Int
B.length ByteString
b') ByteString
c
| Bool
otherwise = Fastq
f
where
(ByteString
b', ByteString
trailing) = (Char -> Bool) -> ByteString -> (ByteString, ByteString)
B.spanEnd (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'A') ByteString
b