{-# LANGUAGE OverloadedStrings #-}
module SequenceFormats.Utils (liftParsingErrors,
consumeProducer, readFileProd,
SeqFormatException(..),
Chrom(..), word) where
import Control.Error (readErr)
import Control.Exception (Exception, throw)
import Control.Monad.Catch (MonadThrow, throwM)
import Control.Monad.Trans.Class (lift)
import qualified Data.Attoparsec.ByteString.Char8 as A
import qualified Data.ByteString.Char8 as B
import Data.Char (isSpace)
import Pipes (Producer, next)
import Pipes.Attoparsec (ParsingError(..), parsed)
import qualified Pipes.ByteString as PB
import qualified Pipes.Safe as PS
import qualified Pipes.Safe.Prelude as PS
import System.IO (IOMode(..))
data SeqFormatException = SeqFormatException String
deriving (Int -> SeqFormatException -> ShowS
[SeqFormatException] -> ShowS
SeqFormatException -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [SeqFormatException] -> ShowS
$cshowList :: [SeqFormatException] -> ShowS
show :: SeqFormatException -> String
$cshow :: SeqFormatException -> String
showsPrec :: Int -> SeqFormatException -> ShowS
$cshowsPrec :: Int -> SeqFormatException -> ShowS
Show, SeqFormatException -> SeqFormatException -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: SeqFormatException -> SeqFormatException -> Bool
$c/= :: SeqFormatException -> SeqFormatException -> Bool
== :: SeqFormatException -> SeqFormatException -> Bool
$c== :: SeqFormatException -> SeqFormatException -> Bool
Eq)
instance Exception SeqFormatException
newtype Chrom = Chrom {Chrom -> ByteString
unChrom :: B.ByteString} deriving (Chrom -> Chrom -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Chrom -> Chrom -> Bool
$c/= :: Chrom -> Chrom -> Bool
== :: Chrom -> Chrom -> Bool
$c== :: Chrom -> Chrom -> Bool
Eq)
instance Show Chrom where
show :: Chrom -> String
show (Chrom ByteString
c) = ByteString -> String
B.unpack ByteString
c
instance Ord Chrom where
compare :: Chrom -> Chrom -> Ordering
compare (Chrom ByteString
c1) (Chrom ByteString
c2) =
let [ByteString
c1NoChr, ByteString
c2NoChr] = forall a b. (a -> b) -> [a] -> [b]
map ByteString -> ByteString
removeChr [ByteString
c1, ByteString
c2]
[ByteString
c1XYMTconvert, ByteString
c2XYMTconvert] = forall a b. (a -> b) -> [a] -> [b]
map ByteString -> ByteString
convertXYMT [ByteString
c1NoChr, ByteString
c2NoChr]
in case (,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ByteString -> Either SeqFormatException Int
readChrom ByteString
c1XYMTconvert forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ByteString -> Either SeqFormatException Int
readChrom ByteString
c2XYMTconvert of
Left SeqFormatException
e -> forall a e. Exception e => e -> a
throw SeqFormatException
e
Right (Int
cn1, Int
cn2) -> Int
cn1 forall a. Ord a => a -> a -> Ordering
`compare` Int
cn2
where
removeChr :: B.ByteString -> B.ByteString
removeChr :: ByteString -> ByteString
removeChr ByteString
c = if Int -> ByteString -> ByteString
B.take Int
3 ByteString
c forall a. Eq a => a -> a -> Bool
== ByteString
"chr" then Int -> ByteString -> ByteString
B.drop Int
3 ByteString
c else ByteString
c
convertXYMT :: B.ByteString -> B.ByteString
convertXYMT :: ByteString -> ByteString
convertXYMT ByteString
c = case ByteString
c of
ByteString
"X" -> ByteString
"23"
ByteString
"Y" -> ByteString
"24"
ByteString
"MT" -> ByteString
"90"
ByteString
n -> ByteString
n
readChrom :: B.ByteString -> Either SeqFormatException Int
readChrom :: ByteString -> Either SeqFormatException Int
readChrom ByteString
c = forall a e. Read a => e -> String -> Either e a
readErr (String -> SeqFormatException
SeqFormatException forall a b. (a -> b) -> a -> b
$ String
"cannot parse chromosome " forall a. [a] -> [a] -> [a]
++ ByteString -> String
B.unpack ByteString
c) forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
B.unpack forall a b. (a -> b) -> a -> b
$ ByteString
c
liftParsingErrors :: (MonadThrow m) =>
Either (ParsingError, Producer B.ByteString m r) () -> Producer a m ()
liftParsingErrors :: forall (m :: * -> *) r a.
MonadThrow m =>
Either (ParsingError, Producer ByteString m r) ()
-> Producer a m ()
liftParsingErrors Either (ParsingError, Producer ByteString m r) ()
res = case Either (ParsingError, Producer ByteString m r) ()
res of
Left (ParsingError [String]
_ String
msg, Producer ByteString m r
restProd) -> do
Either r (ByteString, Producer ByteString m r)
x <- forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer ByteString m r
restProd
case Either r (ByteString, Producer ByteString m r)
x of
Right (ByteString
chunk, Producer ByteString m r
_) -> do
let msg' :: String
msg' = String
"Error while parsing: " forall a. Semigroup a => a -> a -> a
<> String
msg forall a. Semigroup a => a -> a -> a
<> String
". Error occurred when trying to parse this chunk: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show ByteString
chunk
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM forall a b. (a -> b) -> a -> b
$ String -> SeqFormatException
SeqFormatException String
msg'
Left r
_ -> forall a. HasCallStack => String -> a
error String
"should not happen"
Right () -> forall (m :: * -> *) a. Monad m => a -> m a
return ()
consumeProducer :: (MonadThrow m) => A.Parser a -> Producer B.ByteString m () -> Producer a m ()
consumeProducer :: forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser a
parser Producer ByteString m ()
prod = forall (m :: * -> *) a b r.
(Monad m, ParserInput a) =>
Parser a b
-> Producer a m r
-> Producer b m (Either (ParsingError, Producer a m r) r)
parsed Parser a
parser Producer ByteString m ()
prod forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall (m :: * -> *) r a.
MonadThrow m =>
Either (ParsingError, Producer ByteString m r) ()
-> Producer a m ()
liftParsingErrors
readFileProd :: (PS.MonadSafe m) => FilePath -> Producer B.ByteString m ()
readFileProd :: forall (m :: * -> *).
MonadSafe m =>
String -> Producer ByteString m ()
readFileProd String
f = forall (m :: * -> *) r.
MonadSafe m =>
String -> IOMode -> (Handle -> m r) -> m r
PS.withFile String
f IOMode
ReadMode (\Handle
h -> forall (m :: * -> *).
MonadIO m =>
Handle -> Producer' ByteString m ()
PB.fromHandle Handle
h)
word :: A.Parser B.ByteString
word :: Parser ByteString
word = (Char -> Bool) -> Parser ByteString
A.takeTill Char -> Bool
isSpace