module SequenceFormats.Bed where
import SequenceFormats.Utils (Chrom (..), consumeProducer,
readFileProd)
import Control.Monad.Trans.Class (lift)
import qualified Data.Attoparsec.ByteString.Char8 as A
import Data.Char (isSpace)
import Pipes (Producer, next, yield)
import Pipes.Safe (MonadSafe)
data BedEntry = BedEntry Chrom Int Int
deriving (Int -> BedEntry -> ShowS
[BedEntry] -> ShowS
BedEntry -> String
(Int -> BedEntry -> ShowS)
-> (BedEntry -> String) -> ([BedEntry] -> ShowS) -> Show BedEntry
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [BedEntry] -> ShowS
$cshowList :: [BedEntry] -> ShowS
show :: BedEntry -> String
$cshow :: BedEntry -> String
showsPrec :: Int -> BedEntry -> ShowS
$cshowsPrec :: Int -> BedEntry -> ShowS
Show, BedEntry -> BedEntry -> Bool
(BedEntry -> BedEntry -> Bool)
-> (BedEntry -> BedEntry -> Bool) -> Eq BedEntry
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: BedEntry -> BedEntry -> Bool
$c/= :: BedEntry -> BedEntry -> Bool
== :: BedEntry -> BedEntry -> Bool
$c== :: BedEntry -> BedEntry -> Bool
Eq)
bedFileParser :: A.Parser BedEntry
bedFileParser :: Parser BedEntry
bedFileParser = Chrom -> Int -> Int -> BedEntry
BedEntry (Chrom -> Int -> Int -> BedEntry)
-> Parser ByteString Chrom
-> Parser ByteString (Int -> Int -> BedEntry)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Chrom
chrom Parser ByteString (Int -> Int -> BedEntry)
-> Parser ByteString ()
-> Parser ByteString (Int -> Int -> BedEntry)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
A.skipSpace Parser ByteString (Int -> Int -> BedEntry)
-> Parser ByteString Int -> Parser ByteString (Int -> BedEntry)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString Int
forall a. Integral a => Parser a
A.decimal Parser ByteString (Int -> BedEntry)
-> Parser ByteString () -> Parser ByteString (Int -> BedEntry)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
A.skipSpace Parser ByteString (Int -> BedEntry)
-> Parser ByteString Int -> Parser BedEntry
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString Int
forall a. Integral a => Parser a
A.decimal Parser BedEntry -> Parser ByteString () -> Parser BedEntry
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
A.endOfLine
where
chrom :: Parser ByteString Chrom
chrom = ByteString -> Chrom
Chrom (ByteString -> Chrom)
-> Parser ByteString ByteString -> Parser ByteString Chrom
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString ByteString
A.takeTill Char -> Bool
isSpace
readBedFile :: (MonadSafe m) => FilePath -> Producer BedEntry m ()
readBedFile :: String -> Producer BedEntry m ()
readBedFile String
bedFile = Parser BedEntry
-> Producer ByteString m () -> Producer BedEntry m ()
forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser BedEntry
bedFileParser (String -> Producer ByteString m ()
forall (m :: * -> *).
MonadSafe m =>
String -> Producer ByteString m ()
readFileProd String
bedFile)
data IntervalStatus = BedBehind
| BedOn
| BedAhead
filterThroughBed :: (Monad m) => Producer BedEntry m () -> (a -> (Chrom, Int)) -> Producer a m () -> Producer a m ()
filterThroughBed :: Producer BedEntry m ()
-> (a -> (Chrom, Int)) -> Producer a m () -> Producer a m ()
filterThroughBed Producer BedEntry m ()
bedProd a -> (Chrom, Int)
posFunc Producer a m ()
gProd = do
Either () (BedEntry, Producer BedEntry m ())
b <- m (Either () (BedEntry, Producer BedEntry m ()))
-> Proxy X () () a m (Either () (BedEntry, Producer BedEntry m ()))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Either () (BedEntry, Producer BedEntry m ()))
-> Proxy
X () () a m (Either () (BedEntry, Producer BedEntry m ())))
-> m (Either () (BedEntry, Producer BedEntry m ()))
-> Proxy X () () a m (Either () (BedEntry, Producer BedEntry m ()))
forall a b. (a -> b) -> a -> b
$ Producer BedEntry m ()
-> m (Either () (BedEntry, Producer BedEntry m ()))
forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer BedEntry m ()
bedProd
let (BedEntry
bedCurrent, Producer BedEntry m ()
bedRest) = case Either () (BedEntry, Producer BedEntry m ())
b of
Left ()
_ -> String -> (BedEntry, Producer BedEntry m ())
forall a. HasCallStack => String -> a
error String
"Bed file empty or not readable"
Right (BedEntry, Producer BedEntry m ())
r -> (BedEntry, Producer BedEntry m ())
r
Either () (a, Producer a m ())
f' <- m (Either () (a, Producer a m ()))
-> Proxy X () () a m (Either () (a, Producer a m ()))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Either () (a, Producer a m ()))
-> Proxy X () () a m (Either () (a, Producer a m ())))
-> m (Either () (a, Producer a m ()))
-> Proxy X () () a m (Either () (a, Producer a m ()))
forall a b. (a -> b) -> a -> b
$ Producer a m () -> m (Either () (a, Producer a m ()))
forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer a m ()
gProd
let (a
gCurrent, Producer a m ()
gRest) = case Either () (a, Producer a m ())
f' of
Left ()
_ -> String -> (a, Producer a m ())
forall a. HasCallStack => String -> a
error String
"Genomic stream empty or not readable"
Right (a, Producer a m ())
r -> (a, Producer a m ())
r
BedEntry
-> a
-> Producer BedEntry m ()
-> Producer a m ()
-> Producer a m ()
forall (m :: * -> *) x' x.
Monad m =>
BedEntry
-> a
-> Proxy X () () BedEntry m ()
-> Proxy X () () a m ()
-> Proxy x' x () a m ()
go BedEntry
bedCurrent a
gCurrent Producer BedEntry m ()
bedRest Producer a m ()
gRest
where
go :: BedEntry
-> a
-> Proxy X () () BedEntry m ()
-> Proxy X () () a m ()
-> Proxy x' x () a m ()
go BedEntry
bedCurrent a
gCurrent Proxy X () () BedEntry m ()
bedRest Proxy X () () a m ()
gRest = do
let recurseNextBed :: Proxy x' x () a m ()
recurseNextBed = do
Either () (BedEntry, Proxy X () () BedEntry m ())
b <- m (Either () (BedEntry, Proxy X () () BedEntry m ()))
-> Proxy
x' x () a m (Either () (BedEntry, Proxy X () () BedEntry m ()))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Either () (BedEntry, Proxy X () () BedEntry m ()))
-> Proxy
x' x () a m (Either () (BedEntry, Proxy X () () BedEntry m ())))
-> m (Either () (BedEntry, Proxy X () () BedEntry m ()))
-> Proxy
x' x () a m (Either () (BedEntry, Proxy X () () BedEntry m ()))
forall a b. (a -> b) -> a -> b
$ Proxy X () () BedEntry m ()
-> m (Either () (BedEntry, Proxy X () () BedEntry m ()))
forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Proxy X () () BedEntry m ()
bedRest
case Either () (BedEntry, Proxy X () () BedEntry m ())
b of
Left () -> () -> Proxy x' x () a m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
Right (BedEntry
nextBed, Proxy X () () BedEntry m ()
bedRest') -> BedEntry
-> a
-> Proxy X () () BedEntry m ()
-> Proxy X () () a m ()
-> Proxy x' x () a m ()
go BedEntry
nextBed a
gCurrent Proxy X () () BedEntry m ()
bedRest' Proxy X () () a m ()
gRest
recurseNextG :: Proxy x' x () a m ()
recurseNextG = do
Either () (a, Proxy X () () a m ())
f' <- m (Either () (a, Proxy X () () a m ()))
-> Proxy x' x () a m (Either () (a, Proxy X () () a m ()))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Either () (a, Proxy X () () a m ()))
-> Proxy x' x () a m (Either () (a, Proxy X () () a m ())))
-> m (Either () (a, Proxy X () () a m ()))
-> Proxy x' x () a m (Either () (a, Proxy X () () a m ()))
forall a b. (a -> b) -> a -> b
$ Proxy X () () a m () -> m (Either () (a, Proxy X () () a m ()))
forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Proxy X () () a m ()
gRest
case Either () (a, Proxy X () () a m ())
f' of
Left () -> () -> Proxy x' x () a m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
Right (a
nextG, Proxy X () () a m ()
gRest') -> BedEntry
-> a
-> Proxy X () () BedEntry m ()
-> Proxy X () () a m ()
-> Proxy x' x () a m ()
go BedEntry
bedCurrent a
nextG Proxy X () () BedEntry m ()
bedRest Proxy X () () a m ()
gRest'
case BedEntry
bedCurrent BedEntry -> a -> IntervalStatus
`checkIntervalStatus` a
gCurrent of
IntervalStatus
BedBehind -> Proxy x' x () a m ()
recurseNextBed
IntervalStatus
BedAhead -> Proxy x' x () a m ()
recurseNextG
IntervalStatus
BedOn -> do
a -> Proxy x' x () a m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield a
gCurrent
Proxy x' x () a m ()
recurseNextG
checkIntervalStatus :: BedEntry -> a -> IntervalStatus
checkIntervalStatus (BedEntry Chrom
bedChrom Int
bedStart Int
bedEnd) a
g =
case Chrom
bedChrom Chrom -> Chrom -> Ordering
forall a. Ord a => a -> a -> Ordering
`compare` ((Chrom, Int) -> Chrom
forall a b. (a, b) -> a
fst ((Chrom, Int) -> Chrom) -> (a -> (Chrom, Int)) -> a -> Chrom
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> (Chrom, Int)
posFunc) a
g of
Ordering
LT -> IntervalStatus
BedBehind
Ordering
GT -> IntervalStatus
BedAhead
Ordering
EQ -> if Int
bedStart Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> ((Chrom, Int) -> Int
forall a b. (a, b) -> b
snd ((Chrom, Int) -> Int) -> (a -> (Chrom, Int)) -> a -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> (Chrom, Int)
posFunc) a
g then
IntervalStatus
BedAhead
else
if Int
bedEnd Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< ((Chrom, Int) -> Int
forall a b. (a, b) -> b
snd ((Chrom, Int) -> Int) -> (a -> (Chrom, Int)) -> a -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> (Chrom, Int)
posFunc) a
g then IntervalStatus
BedBehind else IntervalStatus
BedOn