{-# LANGUAGE OverloadedStrings #-}

-- |This module contains helper functions for file parsing.

module SequenceFormats.Utils (liftParsingErrors,
                              consumeProducer, readFileProd,
                              SeqFormatException(..),
                              Chrom(..), word) where

import Control.Error (readErr)
import Control.Exception (Exception, throw)
import Control.Monad.Catch (MonadThrow, throwM)
import Control.Monad.Trans.Class (lift)
import qualified Data.Attoparsec.ByteString.Char8 as A
import qualified Data.ByteString.Char8 as B
import Data.Char (isSpace)
import Pipes (Producer, next)
import Pipes.Attoparsec (ParsingError(..), parsed)
import qualified Pipes.ByteString as PB
import qualified Pipes.Safe as PS
import qualified Pipes.Safe.Prelude as PS
import System.IO (IOMode(..))

-- |An exception type for parsing BioInformatic file formats.
data SeqFormatException = SeqFormatException String
    deriving (Int -> SeqFormatException -> ShowS
[SeqFormatException] -> ShowS
SeqFormatException -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [SeqFormatException] -> ShowS
$cshowList :: [SeqFormatException] -> ShowS
show :: SeqFormatException -> String
$cshow :: SeqFormatException -> String
showsPrec :: Int -> SeqFormatException -> ShowS
$cshowsPrec :: Int -> SeqFormatException -> ShowS
Show, SeqFormatException -> SeqFormatException -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: SeqFormatException -> SeqFormatException -> Bool
$c/= :: SeqFormatException -> SeqFormatException -> Bool
== :: SeqFormatException -> SeqFormatException -> Bool
$c== :: SeqFormatException -> SeqFormatException -> Bool
Eq)

instance Exception SeqFormatException

-- |A wrapper datatype for Chromosome names.
newtype Chrom = Chrom {Chrom -> ByteString
unChrom :: B.ByteString} deriving (Chrom -> Chrom -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Chrom -> Chrom -> Bool
$c/= :: Chrom -> Chrom -> Bool
== :: Chrom -> Chrom -> Bool
$c== :: Chrom -> Chrom -> Bool
Eq)

-- |Show instance for Chrom
instance Show Chrom where
    show :: Chrom -> String
show (Chrom ByteString
c) = ByteString -> String
B.unpack ByteString
c

-- |Ord instance for Chrom
instance Ord Chrom where
    compare :: Chrom -> Chrom -> Ordering
compare (Chrom ByteString
c1) (Chrom ByteString
c2) = 
        let [ByteString
c1NoChr, ByteString
c2NoChr] = forall a b. (a -> b) -> [a] -> [b]
map ByteString -> ByteString
removeChr [ByteString
c1, ByteString
c2]
            [ByteString
c1XYMTconvert, ByteString
c2XYMTconvert] = forall a b. (a -> b) -> [a] -> [b]
map ByteString -> ByteString
convertXYMT [ByteString
c1NoChr, ByteString
c2NoChr]
        in  case (,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ByteString -> Either SeqFormatException Int
readChrom ByteString
c1XYMTconvert forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ByteString -> Either SeqFormatException Int
readChrom ByteString
c2XYMTconvert of
                Left SeqFormatException
e -> forall a e. Exception e => e -> a
throw SeqFormatException
e
                Right (Int
cn1, Int
cn2) -> Int
cn1 forall a. Ord a => a -> a -> Ordering
`compare` Int
cn2
      where
        removeChr :: B.ByteString -> B.ByteString
        removeChr :: ByteString -> ByteString
removeChr ByteString
c = if Int -> ByteString -> ByteString
B.take Int
3 ByteString
c forall a. Eq a => a -> a -> Bool
== ByteString
"chr" then Int -> ByteString -> ByteString
B.drop Int
3 ByteString
c else ByteString
c
        convertXYMT :: B.ByteString -> B.ByteString
        convertXYMT :: ByteString -> ByteString
convertXYMT ByteString
c = case ByteString
c of
            ByteString
"X"  -> ByteString
"23"
            ByteString
"Y"  -> ByteString
"24"
            ByteString
"MT" -> ByteString
"90"
            ByteString
n    -> ByteString
n
        readChrom :: B.ByteString -> Either SeqFormatException Int
        readChrom :: ByteString -> Either SeqFormatException Int
readChrom ByteString
c = forall a e. Read a => e -> String -> Either e a
readErr (String -> SeqFormatException
SeqFormatException forall a b. (a -> b) -> a -> b
$ String
"cannot parse chromosome " forall a. [a] -> [a] -> [a]
++ ByteString -> String
B.unpack ByteString
c) forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
B.unpack forall a b. (a -> b) -> a -> b
$ ByteString
c

-- |A function to help with reporting parsing errors to stderr. Returns a clean Producer over the 
-- parsed datatype.
liftParsingErrors :: (MonadThrow m) =>
    Either (ParsingError, Producer B.ByteString m r) () -> Producer a m ()
liftParsingErrors :: forall (m :: * -> *) r a.
MonadThrow m =>
Either (ParsingError, Producer ByteString m r) ()
-> Producer a m ()
liftParsingErrors Either (ParsingError, Producer ByteString m r) ()
res = case Either (ParsingError, Producer ByteString m r) ()
res of
    Left (ParsingError [String]
_ String
msg, Producer ByteString m r
restProd) -> do
        Either r (ByteString, Producer ByteString m r)
x <- forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer ByteString m r
restProd
        case Either r (ByteString, Producer ByteString m r)
x of
            Right (ByteString
chunk, Producer ByteString m r
_) -> do
                let msg' :: String
msg' = String
"Error while parsing: " forall a. Semigroup a => a -> a -> a
<> String
msg forall a. Semigroup a => a -> a -> a
<> String
". Error occurred when trying to parse this chunk: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show ByteString
chunk
                forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM forall a b. (a -> b) -> a -> b
$ String -> SeqFormatException
SeqFormatException String
msg'
            Left r
_ -> forall a. HasCallStack => String -> a
error String
"should not happen"
    Right () -> forall (m :: * -> *) a. Monad m => a -> m a
return ()

-- |A helper function to parse a text producer, properly reporting all errors to stderr.
consumeProducer :: (MonadThrow m) => A.Parser a -> Producer B.ByteString m () -> Producer a m ()
consumeProducer :: forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser a
parser Producer ByteString m ()
prod = forall (m :: * -> *) a b r.
(Monad m, ParserInput a) =>
Parser a b
-> Producer a m r
-> Producer b m (Either (ParsingError, Producer a m r) r)
parsed Parser a
parser Producer ByteString m ()
prod forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall (m :: * -> *) r a.
MonadThrow m =>
Either (ParsingError, Producer ByteString m r) ()
-> Producer a m ()
liftParsingErrors

readFileProd :: (PS.MonadSafe m) => FilePath -> Producer B.ByteString m ()
readFileProd :: forall (m :: * -> *).
MonadSafe m =>
String -> Producer ByteString m ()
readFileProd String
f = forall (m :: * -> *) r.
MonadSafe m =>
String -> IOMode -> (Handle -> m r) -> m r
PS.withFile String
f IOMode
ReadMode (\Handle
h -> forall (m :: * -> *).
MonadIO m =>
Handle -> Producer' ByteString m ()
PB.fromHandle Handle
h)

word :: A.Parser B.ByteString
word :: Parser ByteString
word = (Char -> Bool) -> Parser ByteString
A.takeTill Char -> Bool
isSpace