{-# LANGUAGE OverloadedStrings #-} -- |This module contains helper functions for file parsing. module SequenceFormats.Utils (liftParsingErrors, consumeProducer, readFileProd, SeqFormatException(..), Chrom(..), word) where import Control.Error (readErr) import Control.Exception (Exception, throw) import Control.Monad.Catch (MonadThrow, throwM) import Control.Monad.Trans.Class (lift) import qualified Data.Attoparsec.ByteString.Char8 as A import qualified Data.ByteString.Char8 as B import Data.Char (isSpace) import Pipes (Producer, next) import Pipes.Attoparsec (ParsingError(..), parsed) import qualified Pipes.ByteString as PB import qualified Pipes.Safe as PS import qualified Pipes.Safe.Prelude as PS import System.IO (IOMode(..)) -- |An exception type for parsing BioInformatic file formats. data SeqFormatException = SeqFormatException String deriving (Show, Eq) instance Exception SeqFormatException -- |A wrapper datatype for Chromosome names. newtype Chrom = Chrom {unChrom :: B.ByteString} deriving (Eq) -- |Show instance for Chrom instance Show Chrom where show (Chrom c) = B.unpack c -- |Ord instance for Chrom instance Ord Chrom where compare (Chrom c1) (Chrom c2) = let [c1NoChr, c2NoChr] = map removeChr [c1, c2] [c1XYMTconvert, c2XYMTconvert] = map convertXYMT [c1NoChr, c2NoChr] in case (,) <$> readChrom c1XYMTconvert <*> readChrom c2XYMTconvert of Left e -> throw e Right (cn1, cn2) -> cn1 `compare` cn2 where removeChr :: B.ByteString -> B.ByteString removeChr c = if B.take 3 c == "chr" then B.drop 3 c else c convertXYMT :: B.ByteString -> B.ByteString convertXYMT c = case c of "X" -> "23" "Y" -> "24" "MT" -> "90" n -> n readChrom :: B.ByteString -> Either SeqFormatException Int readChrom c = readErr (SeqFormatException $ "cannot parse chromosome " ++ B.unpack c) . B.unpack $ c -- |A function to help with reporting parsing errors to stderr. Returns a clean Producer over the -- parsed datatype. liftParsingErrors :: (MonadThrow m) => Either (ParsingError, Producer B.ByteString m r) () -> Producer a m () liftParsingErrors res = case res of Left (ParsingError _ msg, restProd) -> do x <- lift $ next restProd case x of Right (chunk, _) -> do let msg' = "Error while parsing: " <> msg <> ". Error occurred when trying to parse this chunk: " ++ show chunk throwM $ SeqFormatException msg' Left _ -> error "should not happen" Right () -> return () -- |A helper function to parse a text producer, properly reporting all errors to stderr. consumeProducer :: (MonadThrow m) => A.Parser a -> Producer B.ByteString m () -> Producer a m () consumeProducer parser prod = parsed parser prod >>= liftParsingErrors readFileProd :: (PS.MonadSafe m) => FilePath -> Producer B.ByteString m () readFileProd f = PS.withFile f ReadMode PB.fromHandle word :: A.Parser B.ByteString word = A.takeTill isSpace