module SequenceFormats.Bed where

import           SequenceFormats.Utils            (Chrom (..), consumeProducer,
                                                   readFileProd)

import           Control.Monad.Trans.Class        (lift)
import qualified Data.Attoparsec.ByteString.Char8 as A
import           Data.Char                        (isSpace)
import           Pipes                            (Producer, next, yield)
import           Pipes.Safe                       (MonadSafe)

data BedEntry = BedEntry Chrom Int Int
    deriving (Int -> BedEntry -> ShowS
[BedEntry] -> ShowS
BedEntry -> String
(Int -> BedEntry -> ShowS)
-> (BedEntry -> String) -> ([BedEntry] -> ShowS) -> Show BedEntry
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [BedEntry] -> ShowS
$cshowList :: [BedEntry] -> ShowS
show :: BedEntry -> String
$cshow :: BedEntry -> String
showsPrec :: Int -> BedEntry -> ShowS
$cshowsPrec :: Int -> BedEntry -> ShowS
Show, BedEntry -> BedEntry -> Bool
(BedEntry -> BedEntry -> Bool)
-> (BedEntry -> BedEntry -> Bool) -> Eq BedEntry
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: BedEntry -> BedEntry -> Bool
$c/= :: BedEntry -> BedEntry -> Bool
== :: BedEntry -> BedEntry -> Bool
$c== :: BedEntry -> BedEntry -> Bool
Eq)

bedFileParser :: A.Parser BedEntry
bedFileParser :: Parser BedEntry
bedFileParser = Chrom -> Int -> Int -> BedEntry
BedEntry (Chrom -> Int -> Int -> BedEntry)
-> Parser ByteString Chrom
-> Parser ByteString (Int -> Int -> BedEntry)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Chrom
chrom Parser ByteString (Int -> Int -> BedEntry)
-> Parser ByteString ()
-> Parser ByteString (Int -> Int -> BedEntry)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
A.skipSpace Parser ByteString (Int -> Int -> BedEntry)
-> Parser ByteString Int -> Parser ByteString (Int -> BedEntry)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString Int
forall a. Integral a => Parser a
A.decimal Parser ByteString (Int -> BedEntry)
-> Parser ByteString () -> Parser ByteString (Int -> BedEntry)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
A.skipSpace Parser ByteString (Int -> BedEntry)
-> Parser ByteString Int -> Parser BedEntry
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString Int
forall a. Integral a => Parser a
A.decimal Parser BedEntry -> Parser ByteString () -> Parser BedEntry
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
A.endOfLine
  where
    chrom :: Parser ByteString Chrom
chrom = ByteString -> Chrom
Chrom (ByteString -> Chrom)
-> Parser ByteString ByteString -> Parser ByteString Chrom
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString ByteString
A.takeTill Char -> Bool
isSpace

readBedFile :: (MonadSafe m) => FilePath -> Producer BedEntry m ()
readBedFile :: String -> Producer BedEntry m ()
readBedFile String
bedFile = Parser BedEntry
-> Producer ByteString m () -> Producer BedEntry m ()
forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser BedEntry
bedFileParser (String -> Producer ByteString m ()
forall (m :: * -> *).
MonadSafe m =>
String -> Producer ByteString m ()
readFileProd String
bedFile)

data IntervalStatus = BedBehind
    | BedOn
    | BedAhead

filterThroughBed :: (Monad m) => Producer BedEntry m () -> (a -> (Chrom, Int)) -> Producer a m () -> Producer a m ()
filterThroughBed :: Producer BedEntry m ()
-> (a -> (Chrom, Int)) -> Producer a m () -> Producer a m ()
filterThroughBed Producer BedEntry m ()
bedProd a -> (Chrom, Int)
posFunc Producer a m ()
gProd = do
    Either () (BedEntry, Producer BedEntry m ())
b <- m (Either () (BedEntry, Producer BedEntry m ()))
-> Proxy X () () a m (Either () (BedEntry, Producer BedEntry m ()))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Either () (BedEntry, Producer BedEntry m ()))
 -> Proxy
      X () () a m (Either () (BedEntry, Producer BedEntry m ())))
-> m (Either () (BedEntry, Producer BedEntry m ()))
-> Proxy X () () a m (Either () (BedEntry, Producer BedEntry m ()))
forall a b. (a -> b) -> a -> b
$ Producer BedEntry m ()
-> m (Either () (BedEntry, Producer BedEntry m ()))
forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer BedEntry m ()
bedProd
    let (BedEntry
bedCurrent, Producer BedEntry m ()
bedRest) = case Either () (BedEntry, Producer BedEntry m ())
b of
            Left ()
_  -> String -> (BedEntry, Producer BedEntry m ())
forall a. HasCallStack => String -> a
error String
"Bed file empty or not readable"
            Right (BedEntry, Producer BedEntry m ())
r -> (BedEntry, Producer BedEntry m ())
r
    Either () (a, Producer a m ())
f' <- m (Either () (a, Producer a m ()))
-> Proxy X () () a m (Either () (a, Producer a m ()))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Either () (a, Producer a m ()))
 -> Proxy X () () a m (Either () (a, Producer a m ())))
-> m (Either () (a, Producer a m ()))
-> Proxy X () () a m (Either () (a, Producer a m ()))
forall a b. (a -> b) -> a -> b
$ Producer a m () -> m (Either () (a, Producer a m ()))
forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer a m ()
gProd
    let (a
gCurrent, Producer a m ()
gRest) = case Either () (a, Producer a m ())
f' of
            Left ()
_  -> String -> (a, Producer a m ())
forall a. HasCallStack => String -> a
error String
"Genomic stream empty or not readable"
            Right (a, Producer a m ())
r -> (a, Producer a m ())
r
    BedEntry
-> a
-> Producer BedEntry m ()
-> Producer a m ()
-> Producer a m ()
forall (m :: * -> *) x' x.
Monad m =>
BedEntry
-> a
-> Proxy X () () BedEntry m ()
-> Proxy X () () a m ()
-> Proxy x' x () a m ()
go BedEntry
bedCurrent a
gCurrent Producer BedEntry m ()
bedRest Producer a m ()
gRest
  where
    go :: BedEntry
-> a
-> Proxy X () () BedEntry m ()
-> Proxy X () () a m ()
-> Proxy x' x () a m ()
go BedEntry
bedCurrent a
gCurrent Proxy X () () BedEntry m ()
bedRest Proxy X () () a m ()
gRest = do
        let recurseNextBed :: Proxy x' x () a m ()
recurseNextBed = do
                Either () (BedEntry, Proxy X () () BedEntry m ())
b <- m (Either () (BedEntry, Proxy X () () BedEntry m ()))
-> Proxy
     x' x () a m (Either () (BedEntry, Proxy X () () BedEntry m ()))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Either () (BedEntry, Proxy X () () BedEntry m ()))
 -> Proxy
      x' x () a m (Either () (BedEntry, Proxy X () () BedEntry m ())))
-> m (Either () (BedEntry, Proxy X () () BedEntry m ()))
-> Proxy
     x' x () a m (Either () (BedEntry, Proxy X () () BedEntry m ()))
forall a b. (a -> b) -> a -> b
$ Proxy X () () BedEntry m ()
-> m (Either () (BedEntry, Proxy X () () BedEntry m ()))
forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Proxy X () () BedEntry m ()
bedRest
                case Either () (BedEntry, Proxy X () () BedEntry m ())
b of
                    Left () -> () -> Proxy x' x () a m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
                    Right (BedEntry
nextBed, Proxy X () () BedEntry m ()
bedRest') -> BedEntry
-> a
-> Proxy X () () BedEntry m ()
-> Proxy X () () a m ()
-> Proxy x' x () a m ()
go BedEntry
nextBed a
gCurrent Proxy X () () BedEntry m ()
bedRest' Proxy X () () a m ()
gRest
            recurseNextG :: Proxy x' x () a m ()
recurseNextG = do
                Either () (a, Proxy X () () a m ())
f' <- m (Either () (a, Proxy X () () a m ()))
-> Proxy x' x () a m (Either () (a, Proxy X () () a m ()))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Either () (a, Proxy X () () a m ()))
 -> Proxy x' x () a m (Either () (a, Proxy X () () a m ())))
-> m (Either () (a, Proxy X () () a m ()))
-> Proxy x' x () a m (Either () (a, Proxy X () () a m ()))
forall a b. (a -> b) -> a -> b
$ Proxy X () () a m () -> m (Either () (a, Proxy X () () a m ()))
forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Proxy X () () a m ()
gRest
                case Either () (a, Proxy X () () a m ())
f' of
                    Left () -> () -> Proxy x' x () a m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
                    Right (a
nextG, Proxy X () () a m ()
gRest') -> BedEntry
-> a
-> Proxy X () () BedEntry m ()
-> Proxy X () () a m ()
-> Proxy x' x () a m ()
go BedEntry
bedCurrent a
nextG Proxy X () () BedEntry m ()
bedRest Proxy X () () a m ()
gRest'
        case BedEntry
bedCurrent BedEntry -> a -> IntervalStatus
`checkIntervalStatus` a
gCurrent of
            IntervalStatus
BedBehind -> Proxy x' x () a m ()
recurseNextBed
            IntervalStatus
BedAhead -> Proxy x' x () a m ()
recurseNextG
            IntervalStatus
BedOn -> do
                a -> Proxy x' x () a m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield a
gCurrent
                Proxy x' x () a m ()
recurseNextG
    -- checkIntervalStatus :: BedEntry -> a -> IntervalStatus
    checkIntervalStatus :: BedEntry -> a -> IntervalStatus
checkIntervalStatus (BedEntry Chrom
bedChrom Int
bedStart Int
bedEnd) a
g =
        case Chrom
bedChrom Chrom -> Chrom -> Ordering
forall a. Ord a => a -> a -> Ordering
`compare` ((Chrom, Int) -> Chrom
forall a b. (a, b) -> a
fst ((Chrom, Int) -> Chrom) -> (a -> (Chrom, Int)) -> a -> Chrom
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> (Chrom, Int)
posFunc) a
g of
            Ordering
LT -> IntervalStatus
BedBehind
            Ordering
GT -> IntervalStatus
BedAhead
            Ordering
EQ -> if Int
bedStart Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> ((Chrom, Int) -> Int
forall a b. (a, b) -> b
snd ((Chrom, Int) -> Int) -> (a -> (Chrom, Int)) -> a -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> (Chrom, Int)
posFunc) a
g then
                      IntervalStatus
BedAhead
                  else
                      if Int
bedEnd Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< ((Chrom, Int) -> Int
forall a b. (a, b) -> b
snd ((Chrom, Int) -> Int) -> (a -> (Chrom, Int)) -> a -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> (Chrom, Int)
posFunc) a
g then IntervalStatus
BedBehind else IntervalStatus
BedOn