module Bio.Sequence.FastQ
(
readFastQ, hReadFastQ, parse
, writeFastQ, hWriteFastQ
, readSangerQ, hReadSangerQ
, writeSangerQ, hWriteSangerQ
, readIllumina, hReadIllumina
, writeIllumina, hWriteIllumina
) where
import System.IO
import qualified Data.ByteString.Lazy.Char8 as B
import qualified Data.ByteString.Lazy as BB
import Data.List (unfoldr)
import Bio.Core.Sequence
data Sequence = Seq SeqLabel SeqData QualData
instance BioSeq Sequence where
seqlabel (Seq sl _ _) = sl
seqdata (Seq _ sd _) = sd
seqlength = Offset . B.length . unSD . seqdata
instance BioSeqQual Sequence where
seqqual (Seq _ _ q) = q
readSangerQ, readIllumina :: FilePath -> IO [Sequence]
readSangerQ = readFastQ
readIllumina f = addQual (negate 31) `fmap` readFastQ f
hReadSangerQ, hReadIllumina :: Handle -> IO [Sequence]
hReadSangerQ = hReadFastQ
hReadIllumina h = addQual (negate 31) `fmap` hReadFastQ h
writeSangerQ, writeIllumina :: FilePath -> [Sequence] -> IO ()
writeSangerQ = writeFastQ
writeIllumina f = writeFastQ f . addQual 31
hWriteSangerQ, hWriteIllumina :: Handle -> [Sequence] -> IO ()
hWriteSangerQ = hWriteFastQ
hWriteIllumina h = hWriteFastQ h . addQual 31
addQual :: Qual -> [Sequence] -> [Sequence]
addQual (Qual q) = map (\(Seq h d mq) -> (Seq h d $ qmap (+q) mq))
where qmap f (QualData qd) = QualData (BB.map f qd)
readFastQ :: FilePath -> IO [Sequence]
readFastQ f = (go . B.lines) `fmap` B.readFile f
hReadFastQ :: Handle -> IO [Sequence]
hReadFastQ h = (go . B.lines) `fmap` B.hGetContents h
go :: [B.ByteString] -> [Sequence]
go = map (either error id) . unfoldr parse
parse :: [B.ByteString] -> Maybe (Either String (Sequence), [B.ByteString])
parse (h1:sd:h2:sq:rest) =
case (B.uncons h1,B.uncons h2) of
(Just ('@',h1name), Just ('+',h2name))
| h1name == h2name || B.null h2name
-> Just (Right $ Seq (SeqLabel h1name) (SeqData sd) (QualData (BB.map (subtract 33) sq)), rest)
| otherwise
-> Just (Left $ "Bio.Sequence.FastQ: name mismatch:" ++ showStanza, rest)
_ -> Just (Left $ "Bio.Sequence.FastQ: illegal FastQ format:" ++ showStanza, rest)
where showStanza = unlines $ map B.unpack [ h1, sd, h2, sq ]
parse [] = Nothing
parse fs = let showStanza = unlines (map B.unpack fs)
err = Left $ "Bio.Sequence.FastQ: illegal number of lines in FastQ format: " ++ showStanza
in Just (err, [])
writeFastQ :: FilePath -> [Sequence] -> IO ()
writeFastQ f = B.writeFile f . B.concat . map toFastQ
hWriteFastQ :: Handle -> [Sequence] -> IO ()
hWriteFastQ h = B.hPut h . B.concat . map toFastQ