module SequenceFormats.Bed where
import SequenceFormats.Utils (Chrom (..), consumeProducer,
readFileProd)
import Control.Monad.Trans.Class (lift)
import qualified Data.Attoparsec.ByteString.Char8 as A
import Data.Char (isSpace)
import Pipes (Producer, next, yield)
import Pipes.Safe (MonadSafe)
data BedEntry = BedEntry Chrom Int Int
deriving (Int -> BedEntry -> ShowS
[BedEntry] -> ShowS
BedEntry -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [BedEntry] -> ShowS
$cshowList :: [BedEntry] -> ShowS
show :: BedEntry -> String
$cshow :: BedEntry -> String
showsPrec :: Int -> BedEntry -> ShowS
$cshowsPrec :: Int -> BedEntry -> ShowS
Show, BedEntry -> BedEntry -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: BedEntry -> BedEntry -> Bool
$c/= :: BedEntry -> BedEntry -> Bool
== :: BedEntry -> BedEntry -> Bool
$c== :: BedEntry -> BedEntry -> Bool
Eq)
bedFileParser :: A.Parser BedEntry
bedFileParser :: Parser BedEntry
bedFileParser = Chrom -> Int -> Int -> BedEntry
BedEntry forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Chrom
chrom forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
A.skipSpace forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall a. Integral a => Parser a
A.decimal forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
A.skipSpace forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall a. Integral a => Parser a
A.decimal forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
A.endOfLine
where
chrom :: Parser ByteString Chrom
chrom = ByteString -> Chrom
Chrom forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString
A.takeTill Char -> Bool
isSpace
readBedFile :: (MonadSafe m) => FilePath -> Producer BedEntry m ()
readBedFile :: forall (m :: * -> *).
MonadSafe m =>
String -> Producer BedEntry m ()
readBedFile String
bedFile = forall (m :: * -> *) a.
MonadThrow m =>
Parser a -> Producer ByteString m () -> Producer a m ()
consumeProducer Parser BedEntry
bedFileParser (forall (m :: * -> *).
MonadSafe m =>
String -> Producer ByteString m ()
readFileProd String
bedFile)
data IntervalStatus = BedBehind
| BedOn
| BedAhead
filterThroughBed :: (Monad m) => Producer BedEntry m () -> (a -> (Chrom, Int)) -> Producer a m () -> Producer a m ()
filterThroughBed :: forall (m :: * -> *) a.
Monad m =>
Producer BedEntry m ()
-> (a -> (Chrom, Int)) -> Producer a m () -> Producer a m ()
filterThroughBed Producer BedEntry m ()
bedProd a -> (Chrom, Int)
posFunc Producer a m ()
gProd = do
Either () (BedEntry, Producer BedEntry m ())
b <- forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer BedEntry m ()
bedProd
let (BedEntry
bedCurrent, Producer BedEntry m ()
bedRest) = case Either () (BedEntry, Producer BedEntry m ())
b of
Left ()
_ -> forall a. HasCallStack => String -> a
error String
"Bed file empty or not readable"
Right (BedEntry, Producer BedEntry m ())
r -> (BedEntry, Producer BedEntry m ())
r
Either () (a, Producer a m ())
f' <- forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer a m ()
gProd
let (a
gCurrent, Producer a m ()
gRest) = case Either () (a, Producer a m ())
f' of
Left ()
_ -> forall a. HasCallStack => String -> a
error String
"Genomic stream empty or not readable"
Right (a, Producer a m ())
r -> (a, Producer a m ())
r
forall {m :: * -> *} {x'} {x}.
Monad m =>
BedEntry
-> a
-> Producer BedEntry m ()
-> Producer a m ()
-> Proxy x' x () a m ()
go BedEntry
bedCurrent a
gCurrent Producer BedEntry m ()
bedRest Producer a m ()
gRest
where
go :: BedEntry
-> a
-> Producer BedEntry m ()
-> Producer a m ()
-> Proxy x' x () a m ()
go BedEntry
bedCurrent a
gCurrent Producer BedEntry m ()
bedRest Producer a m ()
gRest = do
let recurseNextBed :: Proxy x' x () a m ()
recurseNextBed = do
Either () (BedEntry, Producer BedEntry m ())
b <- forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer BedEntry m ()
bedRest
case Either () (BedEntry, Producer BedEntry m ())
b of
Left () -> forall (m :: * -> *) a. Monad m => a -> m a
return ()
Right (BedEntry
nextBed, Producer BedEntry m ()
bedRest') -> BedEntry
-> a
-> Producer BedEntry m ()
-> Producer a m ()
-> Proxy x' x () a m ()
go BedEntry
nextBed a
gCurrent Producer BedEntry m ()
bedRest' Producer a m ()
gRest
recurseNextG :: Proxy x' x () a m ()
recurseNextG = do
Either () (a, Producer a m ())
f' <- forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer a m ()
gRest
case Either () (a, Producer a m ())
f' of
Left () -> forall (m :: * -> *) a. Monad m => a -> m a
return ()
Right (a
nextG, Producer a m ()
gRest') -> BedEntry
-> a
-> Producer BedEntry m ()
-> Producer a m ()
-> Proxy x' x () a m ()
go BedEntry
bedCurrent a
nextG Producer BedEntry m ()
bedRest Producer a m ()
gRest'
case BedEntry
bedCurrent BedEntry -> a -> IntervalStatus
`checkIntervalStatus` a
gCurrent of
IntervalStatus
BedBehind -> Proxy x' x () a m ()
recurseNextBed
IntervalStatus
BedAhead -> Proxy x' x () a m ()
recurseNextG
IntervalStatus
BedOn -> do
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield a
gCurrent
Proxy x' x () a m ()
recurseNextG
checkIntervalStatus :: BedEntry -> a -> IntervalStatus
checkIntervalStatus (BedEntry Chrom
bedChrom Int
bedStart Int
bedEnd) a
g =
case Chrom
bedChrom forall a. Ord a => a -> a -> Ordering
`compare` (forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> (Chrom, Int)
posFunc) a
g of
Ordering
LT -> IntervalStatus
BedBehind
Ordering
GT -> IntervalStatus
BedAhead
Ordering
EQ -> if Int
bedStart forall a. Num a => a -> a -> a
+ Int
1 forall a. Ord a => a -> a -> Bool
> (forall a b. (a, b) -> b
snd forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> (Chrom, Int)
posFunc) a
g then
IntervalStatus
BedAhead
else
if Int
bedEnd forall a. Ord a => a -> a -> Bool
< (forall a b. (a, b) -> b
snd forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> (Chrom, Int)
posFunc) a
g then IntervalStatus
BedBehind else IntervalStatus
BedOn